From a243a2aac2e50144b9c3b58008f70d94e63ecd19 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 23 Nov 2023 05:36:37 +0000 Subject: [PATCH] deploy: 584d3da1443f7151969fd8d503ab219a38adc5e0 --- 0.5.10/cloud/billing/bills/index.html | 4 ++-- 0.5.10/cloud/billing/index.html | 4 ++-- 0.5.10/cloud/billing/recharge/index.html | 4 ++-- 0.5.10/cloud/billing/refund/index.html | 4 ++-- 0.5.10/cloud/billing/voucher/index.html | 4 ++-- 0.5.10/cloud/index.html | 4 ++-- 0.5.10/community/contribute/index.html | 4 ++-- 0.5.10/concepts/index.html | 4 ++-- 0.5.10/concepts/names/index.html | 4 ++-- 0.5.10/concepts/project/index.html | 4 ++-- 0.5.10/concepts/roles-permissions/index.html | 4 ++-- 0.5.10/concepts/versioning/index.html | 4 ++-- 0.5.10/dataset/index.html | 4 ++-- 0.5.10/dataset/yaml/index.html | 4 ++-- 0.5.10/evaluation/heterogeneous/node-able/index.html | 4 ++-- 0.5.10/evaluation/heterogeneous/virtual-node/index.html | 4 ++-- 0.5.10/evaluation/index.html | 4 ++-- 0.5.10/faq/index.html | 4 ++-- 0.5.10/getting-started/cloud/index.html | 4 ++-- 0.5.10/getting-started/index.html | 4 ++-- 0.5.10/getting-started/runtime/index.html | 4 ++-- 0.5.10/getting-started/server/index.html | 4 ++-- 0.5.10/getting-started/standalone/index.html | 4 ++-- 0.5.10/index.html | 4 ++-- 0.5.10/model/index.html | 4 ++-- 0.5.10/model/yaml/index.html | 4 ++-- 0.5.10/reference/sdk/dataset/index.html | 4 ++-- 0.5.10/reference/sdk/evaluation/index.html | 4 ++-- 0.5.10/reference/sdk/model/index.html | 4 ++-- 0.5.10/reference/sdk/other/index.html | 4 ++-- 0.5.10/reference/sdk/overview/index.html | 4 ++-- 0.5.10/reference/sdk/type/index.html | 4 ++-- 0.5.10/reference/swcli/dataset/index.html | 4 ++-- 0.5.10/reference/swcli/index.html | 4 ++-- 0.5.10/reference/swcli/instance/index.html | 4 ++-- 0.5.10/reference/swcli/job/index.html | 4 ++-- 0.5.10/reference/swcli/model/index.html | 4 ++-- 0.5.10/reference/swcli/project/index.html | 4 ++-- 0.5.10/reference/swcli/runtime/index.html | 4 ++-- 0.5.10/reference/swcli/utilities/index.html | 4 ++-- 0.5.10/runtime/index.html | 4 ++-- 0.5.10/runtime/yaml/index.html | 4 ++-- 0.5.10/server/guides/server_admin/index.html | 4 ++-- 0.5.10/server/index.html | 4 ++-- 0.5.10/server/installation/docker/index.html | 4 ++-- 0.5.10/server/installation/helm-charts/index.html | 4 ++-- 0.5.10/server/installation/index.html | 4 ++-- 0.5.10/server/installation/minikube/index.html | 4 ++-- 0.5.10/server/installation/starwhale_env/index.html | 4 ++-- 0.5.10/server/project/index.html | 4 ++-- 0.5.10/swcli/config/index.html | 4 ++-- 0.5.10/swcli/index.html | 4 ++-- 0.5.10/swcli/installation/index.html | 4 ++-- 0.5.10/swcli/swignore/index.html | 4 ++-- 0.5.10/swcli/uri/index.html | 4 ++-- 0.5.12/cloud/billing/bills/index.html | 4 ++-- 0.5.12/cloud/billing/index.html | 4 ++-- 0.5.12/cloud/billing/recharge/index.html | 4 ++-- 0.5.12/cloud/billing/refund/index.html | 4 ++-- 0.5.12/cloud/billing/voucher/index.html | 4 ++-- 0.5.12/cloud/index.html | 4 ++-- 0.5.12/community/contribute/index.html | 4 ++-- 0.5.12/concepts/index.html | 4 ++-- 0.5.12/concepts/names/index.html | 4 ++-- 0.5.12/concepts/project/index.html | 4 ++-- 0.5.12/concepts/roles-permissions/index.html | 4 ++-- 0.5.12/concepts/versioning/index.html | 4 ++-- 0.5.12/dataset/index.html | 4 ++-- 0.5.12/dataset/yaml/index.html | 4 ++-- 0.5.12/evaluation/heterogeneous/node-able/index.html | 4 ++-- 0.5.12/evaluation/heterogeneous/virtual-node/index.html | 4 ++-- 0.5.12/evaluation/index.html | 4 ++-- 0.5.12/faq/index.html | 4 ++-- 0.5.12/getting-started/cloud/index.html | 4 ++-- 0.5.12/getting-started/index.html | 4 ++-- 0.5.12/getting-started/runtime/index.html | 4 ++-- 0.5.12/getting-started/server/index.html | 4 ++-- 0.5.12/getting-started/standalone/index.html | 4 ++-- 0.5.12/index.html | 4 ++-- 0.5.12/model/index.html | 4 ++-- 0.5.12/model/yaml/index.html | 4 ++-- 0.5.12/reference/sdk/dataset/index.html | 4 ++-- 0.5.12/reference/sdk/evaluation/index.html | 4 ++-- 0.5.12/reference/sdk/job/index.html | 4 ++-- 0.5.12/reference/sdk/model/index.html | 4 ++-- 0.5.12/reference/sdk/other/index.html | 4 ++-- 0.5.12/reference/sdk/overview/index.html | 4 ++-- 0.5.12/reference/sdk/type/index.html | 4 ++-- 0.5.12/reference/swcli/dataset/index.html | 4 ++-- 0.5.12/reference/swcli/index.html | 4 ++-- 0.5.12/reference/swcli/instance/index.html | 4 ++-- 0.5.12/reference/swcli/job/index.html | 4 ++-- 0.5.12/reference/swcli/model/index.html | 4 ++-- 0.5.12/reference/swcli/project/index.html | 4 ++-- 0.5.12/reference/swcli/runtime/index.html | 4 ++-- 0.5.12/reference/swcli/utilities/index.html | 4 ++-- 0.5.12/runtime/index.html | 4 ++-- 0.5.12/runtime/yaml/index.html | 4 ++-- 0.5.12/server/guides/server_admin/index.html | 4 ++-- 0.5.12/server/index.html | 4 ++-- 0.5.12/server/installation/docker-compose/index.html | 4 ++-- 0.5.12/server/installation/docker/index.html | 4 ++-- 0.5.12/server/installation/helm-charts/index.html | 4 ++-- 0.5.12/server/installation/index.html | 4 ++-- 0.5.12/server/installation/minikube/index.html | 4 ++-- 0.5.12/server/installation/starwhale_env/index.html | 4 ++-- 0.5.12/server/project/index.html | 4 ++-- 0.5.12/swcli/config/index.html | 4 ++-- 0.5.12/swcli/index.html | 4 ++-- 0.5.12/swcli/installation/index.html | 4 ++-- 0.5.12/swcli/swignore/index.html | 4 ++-- 0.5.12/swcli/uri/index.html | 4 ++-- 0.6.0/cloud/billing/bills/index.html | 4 ++-- 0.6.0/cloud/billing/index.html | 4 ++-- 0.6.0/cloud/billing/recharge/index.html | 4 ++-- 0.6.0/cloud/billing/refund/index.html | 4 ++-- 0.6.0/cloud/billing/voucher/index.html | 4 ++-- 0.6.0/cloud/index.html | 4 ++-- 0.6.0/community/contribute/index.html | 4 ++-- 0.6.0/concepts/index.html | 4 ++-- 0.6.0/concepts/names/index.html | 4 ++-- 0.6.0/concepts/project/index.html | 4 ++-- 0.6.0/concepts/roles-permissions/index.html | 4 ++-- 0.6.0/concepts/versioning/index.html | 4 ++-- 0.6.0/dataset/index.html | 4 ++-- 0.6.0/dataset/yaml/index.html | 4 ++-- 0.6.0/evaluation/heterogeneous/node-able/index.html | 4 ++-- 0.6.0/evaluation/heterogeneous/virtual-node/index.html | 4 ++-- 0.6.0/evaluation/index.html | 4 ++-- 0.6.0/faq/index.html | 4 ++-- 0.6.0/getting-started/cloud/index.html | 4 ++-- 0.6.0/getting-started/index.html | 4 ++-- 0.6.0/getting-started/runtime/index.html | 4 ++-- 0.6.0/getting-started/server/index.html | 4 ++-- 0.6.0/getting-started/standalone/index.html | 4 ++-- 0.6.0/index.html | 4 ++-- 0.6.0/model/index.html | 4 ++-- 0.6.0/model/yaml/index.html | 4 ++-- 0.6.0/reference/sdk/dataset/index.html | 4 ++-- 0.6.0/reference/sdk/evaluation/index.html | 4 ++-- 0.6.0/reference/sdk/job/index.html | 4 ++-- 0.6.0/reference/sdk/model/index.html | 4 ++-- 0.6.0/reference/sdk/other/index.html | 4 ++-- 0.6.0/reference/sdk/overview/index.html | 4 ++-- 0.6.0/reference/sdk/type/index.html | 4 ++-- 0.6.0/reference/swcli/dataset/index.html | 4 ++-- 0.6.0/reference/swcli/index.html | 4 ++-- 0.6.0/reference/swcli/instance/index.html | 4 ++-- 0.6.0/reference/swcli/job/index.html | 4 ++-- 0.6.0/reference/swcli/model/index.html | 4 ++-- 0.6.0/reference/swcli/project/index.html | 4 ++-- 0.6.0/reference/swcli/runtime/index.html | 4 ++-- 0.6.0/reference/swcli/utilities/index.html | 4 ++-- 0.6.0/runtime/index.html | 4 ++-- 0.6.0/runtime/yaml/index.html | 4 ++-- 0.6.0/server/guides/server_admin/index.html | 4 ++-- 0.6.0/server/index.html | 4 ++-- 0.6.0/server/installation/docker-compose/index.html | 4 ++-- 0.6.0/server/installation/docker/index.html | 4 ++-- 0.6.0/server/installation/helm-charts/index.html | 4 ++-- 0.6.0/server/installation/index.html | 4 ++-- 0.6.0/server/installation/minikube/index.html | 4 ++-- 0.6.0/server/installation/starwhale_env/index.html | 4 ++-- 0.6.0/server/project/index.html | 4 ++-- 0.6.0/swcli/config/index.html | 4 ++-- 0.6.0/swcli/index.html | 4 ++-- 0.6.0/swcli/installation/index.html | 4 ++-- 0.6.0/swcli/swignore/index.html | 4 ++-- 0.6.0/swcli/uri/index.html | 4 ++-- 404.html | 4 ++-- assets/js/{2d78f039.7aad882f.js => 2d78f039.7193711f.js} | 2 +- assets/js/42d9f35f.71c9e63a.js | 1 - assets/js/42d9f35f.9ef03e55.js | 1 + assets/js/{e9fbe6ff.a91515bb.js => e9fbe6ff.644fbef9.js} | 2 +- .../{runtime~main.08eddf9a.js => runtime~main.4ebe5f19.js} | 2 +- blog/archive/index.html | 4 ++-- blog/index.html | 4 ++-- blog/intro-starwhale/index.html | 4 ++-- blog/reproduce-and-compare-evals/index.html | 4 ++-- blog/run-llama2-chat-in-five-minutes/index.html | 4 ++-- blog/tags/index.html | 4 ++-- blog/tags/intro/index.html | 4 ++-- blog/tags/llama-2/index.html | 4 ++-- blog/tags/model-evaluaitons/index.html | 4 ++-- blog/tags/model-package/index.html | 4 ++-- cloud/billing/bills/index.html | 4 ++-- cloud/billing/index.html | 4 ++-- cloud/billing/recharge/index.html | 4 ++-- cloud/billing/refund/index.html | 4 ++-- cloud/billing/voucher/index.html | 4 ++-- cloud/index.html | 4 ++-- community/contribute/index.html | 4 ++-- concepts/index.html | 4 ++-- concepts/names/index.html | 4 ++-- concepts/project/index.html | 4 ++-- concepts/roles-permissions/index.html | 4 ++-- concepts/versioning/index.html | 4 ++-- dataset/index.html | 4 ++-- dataset/yaml/index.html | 4 ++-- evaluation/heterogeneous/node-able/index.html | 4 ++-- evaluation/heterogeneous/virtual-node/index.html | 4 ++-- evaluation/index.html | 4 ++-- faq/index.html | 4 ++-- getting-started/cloud/index.html | 4 ++-- getting-started/index.html | 4 ++-- getting-started/runtime/index.html | 4 ++-- getting-started/server/index.html | 4 ++-- getting-started/standalone/index.html | 4 ++-- index.html | 4 ++-- model/index.html | 4 ++-- model/yaml/index.html | 4 ++-- next/cloud/billing/bills/index.html | 4 ++-- next/cloud/billing/index.html | 4 ++-- next/cloud/billing/recharge/index.html | 4 ++-- next/cloud/billing/refund/index.html | 4 ++-- next/cloud/billing/voucher/index.html | 4 ++-- next/cloud/index.html | 4 ++-- next/community/contribute/index.html | 4 ++-- next/concepts/index.html | 4 ++-- next/concepts/names/index.html | 4 ++-- next/concepts/project/index.html | 4 ++-- next/concepts/roles-permissions/index.html | 4 ++-- next/concepts/versioning/index.html | 4 ++-- next/dataset/index.html | 4 ++-- next/dataset/yaml/index.html | 4 ++-- next/evaluation/heterogeneous/node-able/index.html | 4 ++-- next/evaluation/heterogeneous/virtual-node/index.html | 4 ++-- next/evaluation/index.html | 4 ++-- next/faq/index.html | 4 ++-- next/getting-started/cloud/index.html | 6 +++--- next/getting-started/index.html | 4 ++-- next/getting-started/runtime/index.html | 4 ++-- next/getting-started/server/index.html | 6 +++--- next/getting-started/standalone/index.html | 6 +++--- next/index.html | 4 ++-- next/model/index.html | 4 ++-- next/model/yaml/index.html | 4 ++-- next/reference/sdk/dataset/index.html | 4 ++-- next/reference/sdk/evaluation/index.html | 4 ++-- next/reference/sdk/job/index.html | 4 ++-- next/reference/sdk/model/index.html | 4 ++-- next/reference/sdk/other/index.html | 4 ++-- next/reference/sdk/overview/index.html | 4 ++-- next/reference/sdk/type/index.html | 4 ++-- next/reference/swcli/dataset/index.html | 4 ++-- next/reference/swcli/index.html | 4 ++-- next/reference/swcli/instance/index.html | 4 ++-- next/reference/swcli/job/index.html | 4 ++-- next/reference/swcli/model/index.html | 4 ++-- next/reference/swcli/project/index.html | 4 ++-- next/reference/swcli/runtime/index.html | 4 ++-- next/reference/swcli/utilities/index.html | 4 ++-- next/runtime/index.html | 4 ++-- next/runtime/yaml/index.html | 4 ++-- next/server/guides/server_admin/index.html | 4 ++-- next/server/index.html | 4 ++-- next/server/installation/docker-compose/index.html | 4 ++-- next/server/installation/docker/index.html | 4 ++-- next/server/installation/index.html | 4 ++-- next/server/installation/k8s-cluster/index.html | 4 ++-- next/server/installation/minikube/index.html | 4 ++-- next/server/installation/starwhale_env/index.html | 4 ++-- next/server/project/index.html | 4 ++-- next/swcli/config/index.html | 4 ++-- next/swcli/index.html | 4 ++-- next/swcli/installation/index.html | 4 ++-- next/swcli/swignore/index.html | 4 ++-- next/swcli/uri/index.html | 4 ++-- reference/sdk/dataset/index.html | 4 ++-- reference/sdk/evaluation/index.html | 4 ++-- reference/sdk/job/index.html | 4 ++-- reference/sdk/model/index.html | 4 ++-- reference/sdk/other/index.html | 4 ++-- reference/sdk/overview/index.html | 4 ++-- reference/sdk/type/index.html | 4 ++-- reference/swcli/dataset/index.html | 4 ++-- reference/swcli/index.html | 4 ++-- reference/swcli/instance/index.html | 4 ++-- reference/swcli/job/index.html | 4 ++-- reference/swcli/model/index.html | 4 ++-- reference/swcli/project/index.html | 4 ++-- reference/swcli/runtime/index.html | 4 ++-- reference/swcli/utilities/index.html | 4 ++-- runtime/index.html | 4 ++-- runtime/yaml/index.html | 4 ++-- server/guides/server_admin/index.html | 4 ++-- server/index.html | 4 ++-- server/installation/docker-compose/index.html | 4 ++-- server/installation/docker/index.html | 4 ++-- server/installation/helm-charts/index.html | 4 ++-- server/installation/index.html | 4 ++-- server/installation/minikube/index.html | 4 ++-- server/installation/starwhale_env/index.html | 4 ++-- server/project/index.html | 4 ++-- swcli/config/index.html | 4 ++-- swcli/index.html | 4 ++-- swcli/installation/index.html | 4 ++-- swcli/swignore/index.html | 4 ++-- swcli/uri/index.html | 4 ++-- zh/0.5.10/cloud/billing/bills/index.html | 4 ++-- zh/0.5.10/cloud/billing/index.html | 4 ++-- zh/0.5.10/cloud/billing/recharge/index.html | 4 ++-- zh/0.5.10/cloud/billing/refund/index.html | 4 ++-- zh/0.5.10/cloud/billing/voucher/index.html | 4 ++-- zh/0.5.10/cloud/index.html | 4 ++-- zh/0.5.10/community/contribute/index.html | 4 ++-- zh/0.5.10/concepts/index.html | 4 ++-- zh/0.5.10/concepts/names/index.html | 4 ++-- zh/0.5.10/concepts/project/index.html | 4 ++-- zh/0.5.10/concepts/roles-permissions/index.html | 4 ++-- zh/0.5.10/concepts/versioning/index.html | 4 ++-- zh/0.5.10/dataset/index.html | 4 ++-- zh/0.5.10/dataset/yaml/index.html | 4 ++-- zh/0.5.10/evaluation/heterogeneous/node-able/index.html | 4 ++-- zh/0.5.10/evaluation/heterogeneous/virtual-node/index.html | 4 ++-- zh/0.5.10/evaluation/index.html | 4 ++-- zh/0.5.10/faq/index.html | 4 ++-- zh/0.5.10/getting-started/cloud/index.html | 4 ++-- zh/0.5.10/getting-started/index.html | 4 ++-- zh/0.5.10/getting-started/runtime/index.html | 4 ++-- zh/0.5.10/getting-started/server/index.html | 4 ++-- zh/0.5.10/getting-started/standalone/index.html | 4 ++-- zh/0.5.10/index.html | 4 ++-- zh/0.5.10/model/index.html | 4 ++-- zh/0.5.10/model/yaml/index.html | 4 ++-- zh/0.5.10/reference/sdk/dataset/index.html | 4 ++-- zh/0.5.10/reference/sdk/evaluation/index.html | 4 ++-- zh/0.5.10/reference/sdk/model/index.html | 4 ++-- zh/0.5.10/reference/sdk/other/index.html | 4 ++-- zh/0.5.10/reference/sdk/overview/index.html | 4 ++-- zh/0.5.10/reference/sdk/type/index.html | 4 ++-- zh/0.5.10/reference/swcli/dataset/index.html | 4 ++-- zh/0.5.10/reference/swcli/index.html | 4 ++-- zh/0.5.10/reference/swcli/instance/index.html | 4 ++-- zh/0.5.10/reference/swcli/job/index.html | 4 ++-- zh/0.5.10/reference/swcli/model/index.html | 4 ++-- zh/0.5.10/reference/swcli/project/index.html | 4 ++-- zh/0.5.10/reference/swcli/runtime/index.html | 4 ++-- zh/0.5.10/reference/swcli/utilities/index.html | 4 ++-- zh/0.5.10/runtime/index.html | 4 ++-- zh/0.5.10/runtime/yaml/index.html | 4 ++-- zh/0.5.10/server/guides/server_admin/index.html | 4 ++-- zh/0.5.10/server/index.html | 4 ++-- zh/0.5.10/server/installation/docker/index.html | 4 ++-- zh/0.5.10/server/installation/helm-charts/index.html | 4 ++-- zh/0.5.10/server/installation/index.html | 4 ++-- zh/0.5.10/server/installation/minikube/index.html | 4 ++-- zh/0.5.10/server/installation/starwhale_env/index.html | 4 ++-- zh/0.5.10/server/project/index.html | 4 ++-- zh/0.5.10/swcli/config/index.html | 4 ++-- zh/0.5.10/swcli/index.html | 4 ++-- zh/0.5.10/swcli/installation/index.html | 4 ++-- zh/0.5.10/swcli/swignore/index.html | 4 ++-- zh/0.5.10/swcli/uri/index.html | 4 ++-- zh/0.5.12/cloud/billing/bills/index.html | 4 ++-- zh/0.5.12/cloud/billing/index.html | 4 ++-- zh/0.5.12/cloud/billing/recharge/index.html | 4 ++-- zh/0.5.12/cloud/billing/refund/index.html | 4 ++-- zh/0.5.12/cloud/billing/voucher/index.html | 4 ++-- zh/0.5.12/cloud/index.html | 4 ++-- zh/0.5.12/community/contribute/index.html | 4 ++-- zh/0.5.12/concepts/index.html | 4 ++-- zh/0.5.12/concepts/names/index.html | 4 ++-- zh/0.5.12/concepts/project/index.html | 4 ++-- zh/0.5.12/concepts/roles-permissions/index.html | 4 ++-- zh/0.5.12/concepts/versioning/index.html | 4 ++-- zh/0.5.12/dataset/index.html | 4 ++-- zh/0.5.12/dataset/yaml/index.html | 4 ++-- zh/0.5.12/evaluation/heterogeneous/node-able/index.html | 4 ++-- zh/0.5.12/evaluation/heterogeneous/virtual-node/index.html | 4 ++-- zh/0.5.12/evaluation/index.html | 4 ++-- zh/0.5.12/faq/index.html | 4 ++-- zh/0.5.12/getting-started/cloud/index.html | 4 ++-- zh/0.5.12/getting-started/index.html | 4 ++-- zh/0.5.12/getting-started/runtime/index.html | 4 ++-- zh/0.5.12/getting-started/server/index.html | 4 ++-- zh/0.5.12/getting-started/standalone/index.html | 4 ++-- zh/0.5.12/index.html | 4 ++-- zh/0.5.12/model/index.html | 4 ++-- zh/0.5.12/model/yaml/index.html | 4 ++-- zh/0.5.12/reference/sdk/dataset/index.html | 4 ++-- zh/0.5.12/reference/sdk/evaluation/index.html | 4 ++-- zh/0.5.12/reference/sdk/job/index.html | 4 ++-- zh/0.5.12/reference/sdk/model/index.html | 4 ++-- zh/0.5.12/reference/sdk/other/index.html | 4 ++-- zh/0.5.12/reference/sdk/overview/index.html | 4 ++-- zh/0.5.12/reference/sdk/type/index.html | 4 ++-- zh/0.5.12/reference/swcli/dataset/index.html | 4 ++-- zh/0.5.12/reference/swcli/index.html | 4 ++-- zh/0.5.12/reference/swcli/instance/index.html | 4 ++-- zh/0.5.12/reference/swcli/job/index.html | 4 ++-- zh/0.5.12/reference/swcli/model/index.html | 4 ++-- zh/0.5.12/reference/swcli/project/index.html | 4 ++-- zh/0.5.12/reference/swcli/runtime/index.html | 4 ++-- zh/0.5.12/reference/swcli/utilities/index.html | 4 ++-- zh/0.5.12/runtime/index.html | 4 ++-- zh/0.5.12/runtime/yaml/index.html | 4 ++-- zh/0.5.12/server/guides/server_admin/index.html | 4 ++-- zh/0.5.12/server/index.html | 4 ++-- zh/0.5.12/server/installation/docker-compose/index.html | 4 ++-- zh/0.5.12/server/installation/docker/index.html | 4 ++-- zh/0.5.12/server/installation/helm-charts/index.html | 4 ++-- zh/0.5.12/server/installation/index.html | 4 ++-- zh/0.5.12/server/installation/minikube/index.html | 4 ++-- zh/0.5.12/server/installation/starwhale_env/index.html | 4 ++-- zh/0.5.12/server/project/index.html | 4 ++-- zh/0.5.12/swcli/config/index.html | 4 ++-- zh/0.5.12/swcli/index.html | 4 ++-- zh/0.5.12/swcli/installation/index.html | 4 ++-- zh/0.5.12/swcli/swignore/index.html | 4 ++-- zh/0.5.12/swcli/uri/index.html | 4 ++-- zh/0.6.0/cloud/billing/bills/index.html | 4 ++-- zh/0.6.0/cloud/billing/index.html | 4 ++-- zh/0.6.0/cloud/billing/recharge/index.html | 4 ++-- zh/0.6.0/cloud/billing/refund/index.html | 4 ++-- zh/0.6.0/cloud/billing/voucher/index.html | 4 ++-- zh/0.6.0/cloud/index.html | 4 ++-- zh/0.6.0/community/contribute/index.html | 4 ++-- zh/0.6.0/concepts/index.html | 4 ++-- zh/0.6.0/concepts/names/index.html | 4 ++-- zh/0.6.0/concepts/project/index.html | 4 ++-- zh/0.6.0/concepts/roles-permissions/index.html | 4 ++-- zh/0.6.0/concepts/versioning/index.html | 4 ++-- zh/0.6.0/dataset/index.html | 4 ++-- zh/0.6.0/dataset/yaml/index.html | 4 ++-- zh/0.6.0/evaluation/heterogeneous/node-able/index.html | 4 ++-- zh/0.6.0/evaluation/heterogeneous/virtual-node/index.html | 4 ++-- zh/0.6.0/evaluation/index.html | 4 ++-- zh/0.6.0/faq/index.html | 4 ++-- zh/0.6.0/getting-started/cloud/index.html | 4 ++-- zh/0.6.0/getting-started/index.html | 4 ++-- zh/0.6.0/getting-started/runtime/index.html | 4 ++-- zh/0.6.0/getting-started/server/index.html | 4 ++-- zh/0.6.0/getting-started/standalone/index.html | 4 ++-- zh/0.6.0/index.html | 4 ++-- zh/0.6.0/model/index.html | 4 ++-- zh/0.6.0/model/yaml/index.html | 4 ++-- zh/0.6.0/reference/sdk/dataset/index.html | 4 ++-- zh/0.6.0/reference/sdk/evaluation/index.html | 4 ++-- zh/0.6.0/reference/sdk/job/index.html | 4 ++-- zh/0.6.0/reference/sdk/model/index.html | 4 ++-- zh/0.6.0/reference/sdk/other/index.html | 4 ++-- zh/0.6.0/reference/sdk/overview/index.html | 4 ++-- zh/0.6.0/reference/sdk/type/index.html | 4 ++-- zh/0.6.0/reference/swcli/dataset/index.html | 4 ++-- zh/0.6.0/reference/swcli/index.html | 4 ++-- zh/0.6.0/reference/swcli/instance/index.html | 4 ++-- zh/0.6.0/reference/swcli/job/index.html | 4 ++-- zh/0.6.0/reference/swcli/model/index.html | 4 ++-- zh/0.6.0/reference/swcli/project/index.html | 4 ++-- zh/0.6.0/reference/swcli/runtime/index.html | 4 ++-- zh/0.6.0/reference/swcli/utilities/index.html | 4 ++-- zh/0.6.0/runtime/index.html | 4 ++-- zh/0.6.0/runtime/yaml/index.html | 4 ++-- zh/0.6.0/server/guides/server_admin/index.html | 4 ++-- zh/0.6.0/server/index.html | 4 ++-- zh/0.6.0/server/installation/docker-compose/index.html | 4 ++-- zh/0.6.0/server/installation/docker/index.html | 4 ++-- zh/0.6.0/server/installation/helm-charts/index.html | 4 ++-- zh/0.6.0/server/installation/index.html | 4 ++-- zh/0.6.0/server/installation/minikube/index.html | 4 ++-- zh/0.6.0/server/installation/starwhale_env/index.html | 4 ++-- zh/0.6.0/server/project/index.html | 4 ++-- zh/0.6.0/swcli/config/index.html | 4 ++-- zh/0.6.0/swcli/index.html | 4 ++-- zh/0.6.0/swcli/installation/index.html | 4 ++-- zh/0.6.0/swcli/swignore/index.html | 4 ++-- zh/0.6.0/swcli/uri/index.html | 4 ++-- zh/404.html | 4 ++-- zh/assets/js/{20d3256d.312abaae.js => 20d3256d.a7d310f4.js} | 2 +- zh/assets/js/94a33573.c6306e92.js | 1 - zh/assets/js/94a33573.dc6c4873.js | 1 + zh/assets/js/{d8b4b029.5d9c1fa7.js => d8b4b029.3b540579.js} | 2 +- .../{runtime~main.c6a269b9.js => runtime~main.1c413bf6.js} | 2 +- zh/blog/archive/index.html | 4 ++-- zh/blog/index.html | 4 ++-- zh/blog/intro-starwhale/index.html | 4 ++-- zh/blog/reproduce-and-compare-evals/index.html | 4 ++-- zh/blog/run-llama2-chat-in-five-minutes/index.html | 4 ++-- zh/blog/tags/index.html | 4 ++-- zh/blog/tags/intro/index.html | 4 ++-- zh/blog/tags/llama-2/index.html | 4 ++-- "zh/blog/tags/\346\250\241\345\236\213/index.html" | 4 ++-- .../index.html" | 4 ++-- zh/cloud/billing/bills/index.html | 4 ++-- zh/cloud/billing/index.html | 4 ++-- zh/cloud/billing/recharge/index.html | 4 ++-- zh/cloud/billing/refund/index.html | 4 ++-- zh/cloud/billing/voucher/index.html | 4 ++-- zh/cloud/index.html | 4 ++-- zh/community/contribute/index.html | 4 ++-- zh/concepts/index.html | 4 ++-- zh/concepts/names/index.html | 4 ++-- zh/concepts/project/index.html | 4 ++-- zh/concepts/roles-permissions/index.html | 4 ++-- zh/concepts/versioning/index.html | 4 ++-- zh/dataset/index.html | 4 ++-- zh/dataset/yaml/index.html | 4 ++-- zh/evaluation/heterogeneous/node-able/index.html | 4 ++-- zh/evaluation/heterogeneous/virtual-node/index.html | 4 ++-- zh/evaluation/index.html | 4 ++-- zh/faq/index.html | 4 ++-- zh/getting-started/cloud/index.html | 4 ++-- zh/getting-started/index.html | 4 ++-- zh/getting-started/runtime/index.html | 4 ++-- zh/getting-started/server/index.html | 4 ++-- zh/getting-started/standalone/index.html | 4 ++-- zh/index.html | 4 ++-- zh/model/index.html | 4 ++-- zh/model/yaml/index.html | 4 ++-- zh/next/cloud/billing/bills/index.html | 4 ++-- zh/next/cloud/billing/index.html | 4 ++-- zh/next/cloud/billing/recharge/index.html | 4 ++-- zh/next/cloud/billing/refund/index.html | 4 ++-- zh/next/cloud/billing/voucher/index.html | 4 ++-- zh/next/cloud/index.html | 4 ++-- zh/next/community/contribute/index.html | 4 ++-- zh/next/concepts/index.html | 4 ++-- zh/next/concepts/names/index.html | 4 ++-- zh/next/concepts/project/index.html | 4 ++-- zh/next/concepts/roles-permissions/index.html | 4 ++-- zh/next/concepts/versioning/index.html | 4 ++-- zh/next/dataset/index.html | 4 ++-- zh/next/dataset/yaml/index.html | 4 ++-- zh/next/evaluation/heterogeneous/node-able/index.html | 4 ++-- zh/next/evaluation/heterogeneous/virtual-node/index.html | 4 ++-- zh/next/evaluation/index.html | 4 ++-- zh/next/faq/index.html | 4 ++-- zh/next/getting-started/cloud/index.html | 6 +++--- zh/next/getting-started/index.html | 4 ++-- zh/next/getting-started/runtime/index.html | 4 ++-- zh/next/getting-started/server/index.html | 6 +++--- zh/next/getting-started/standalone/index.html | 6 +++--- zh/next/index.html | 4 ++-- zh/next/model/index.html | 4 ++-- zh/next/model/yaml/index.html | 4 ++-- zh/next/reference/sdk/dataset/index.html | 4 ++-- zh/next/reference/sdk/evaluation/index.html | 4 ++-- zh/next/reference/sdk/job/index.html | 4 ++-- zh/next/reference/sdk/model/index.html | 4 ++-- zh/next/reference/sdk/other/index.html | 4 ++-- zh/next/reference/sdk/overview/index.html | 4 ++-- zh/next/reference/sdk/type/index.html | 4 ++-- zh/next/reference/swcli/dataset/index.html | 4 ++-- zh/next/reference/swcli/index.html | 4 ++-- zh/next/reference/swcli/instance/index.html | 4 ++-- zh/next/reference/swcli/job/index.html | 4 ++-- zh/next/reference/swcli/model/index.html | 4 ++-- zh/next/reference/swcli/project/index.html | 4 ++-- zh/next/reference/swcli/runtime/index.html | 4 ++-- zh/next/reference/swcli/utilities/index.html | 4 ++-- zh/next/runtime/index.html | 4 ++-- zh/next/runtime/yaml/index.html | 4 ++-- zh/next/server/guides/server_admin/index.html | 4 ++-- zh/next/server/index.html | 4 ++-- zh/next/server/installation/docker-compose/index.html | 4 ++-- zh/next/server/installation/docker/index.html | 4 ++-- zh/next/server/installation/index.html | 4 ++-- zh/next/server/installation/k8s-cluster/index.html | 4 ++-- zh/next/server/installation/minikube/index.html | 4 ++-- zh/next/server/installation/starwhale_env/index.html | 4 ++-- zh/next/server/project/index.html | 4 ++-- zh/next/swcli/config/index.html | 4 ++-- zh/next/swcli/index.html | 4 ++-- zh/next/swcli/installation/index.html | 4 ++-- zh/next/swcli/swignore/index.html | 4 ++-- zh/next/swcli/uri/index.html | 4 ++-- zh/reference/sdk/dataset/index.html | 4 ++-- zh/reference/sdk/evaluation/index.html | 4 ++-- zh/reference/sdk/job/index.html | 4 ++-- zh/reference/sdk/model/index.html | 4 ++-- zh/reference/sdk/other/index.html | 4 ++-- zh/reference/sdk/overview/index.html | 4 ++-- zh/reference/sdk/type/index.html | 4 ++-- zh/reference/swcli/dataset/index.html | 4 ++-- zh/reference/swcli/index.html | 4 ++-- zh/reference/swcli/instance/index.html | 4 ++-- zh/reference/swcli/job/index.html | 4 ++-- zh/reference/swcli/model/index.html | 4 ++-- zh/reference/swcli/project/index.html | 4 ++-- zh/reference/swcli/runtime/index.html | 4 ++-- zh/reference/swcli/utilities/index.html | 4 ++-- zh/runtime/index.html | 4 ++-- zh/runtime/yaml/index.html | 4 ++-- zh/server/guides/server_admin/index.html | 4 ++-- zh/server/index.html | 4 ++-- zh/server/installation/docker-compose/index.html | 4 ++-- zh/server/installation/docker/index.html | 4 ++-- zh/server/installation/helm-charts/index.html | 4 ++-- zh/server/installation/index.html | 4 ++-- zh/server/installation/minikube/index.html | 4 ++-- zh/server/installation/starwhale_env/index.html | 4 ++-- zh/server/project/index.html | 4 ++-- zh/swcli/config/index.html | 4 ++-- zh/swcli/index.html | 4 ++-- zh/swcli/installation/index.html | 4 ++-- zh/swcli/swignore/index.html | 4 ++-- zh/swcli/uri/index.html | 4 ++-- 598 files changed, 1190 insertions(+), 1190 deletions(-) rename assets/js/{2d78f039.7aad882f.js => 2d78f039.7193711f.js} (52%) delete mode 100644 assets/js/42d9f35f.71c9e63a.js create mode 100644 assets/js/42d9f35f.9ef03e55.js rename assets/js/{e9fbe6ff.a91515bb.js => e9fbe6ff.644fbef9.js} (71%) rename assets/js/{runtime~main.08eddf9a.js => runtime~main.4ebe5f19.js} (98%) rename zh/assets/js/{20d3256d.312abaae.js => 20d3256d.a7d310f4.js} (78%) delete mode 100644 zh/assets/js/94a33573.c6306e92.js create mode 100644 zh/assets/js/94a33573.dc6c4873.js rename zh/assets/js/{d8b4b029.5d9c1fa7.js => d8b4b029.3b540579.js} (79%) rename zh/assets/js/{runtime~main.c6a269b9.js => runtime~main.1c413bf6.js} (98%) diff --git a/0.5.10/cloud/billing/bills/index.html b/0.5.10/cloud/billing/bills/index.html index 7f9b7b472..85a071cd4 100644 --- a/0.5.10/cloud/billing/bills/index.html +++ b/0.5.10/cloud/billing/bills/index.html @@ -10,13 +10,13 @@ - +
Skip to main content
- + \ No newline at end of file diff --git a/0.5.10/cloud/billing/index.html b/0.5.10/cloud/billing/index.html index fa670b3cc..53c509e6a 100644 --- a/0.5.10/cloud/billing/index.html +++ b/0.5.10/cloud/billing/index.html @@ -10,13 +10,13 @@ - +
Skip to main content
- + \ No newline at end of file diff --git a/0.5.10/cloud/billing/recharge/index.html b/0.5.10/cloud/billing/recharge/index.html index 6421ce99a..6937f4d44 100644 --- a/0.5.10/cloud/billing/recharge/index.html +++ b/0.5.10/cloud/billing/recharge/index.html @@ -10,13 +10,13 @@ - +
Skip to main content
- + \ No newline at end of file diff --git a/0.5.10/cloud/billing/refund/index.html b/0.5.10/cloud/billing/refund/index.html index 8822f7a6d..068af2413 100644 --- a/0.5.10/cloud/billing/refund/index.html +++ b/0.5.10/cloud/billing/refund/index.html @@ -10,13 +10,13 @@ - +
Skip to main content
- + \ No newline at end of file diff --git a/0.5.10/cloud/billing/voucher/index.html b/0.5.10/cloud/billing/voucher/index.html index 4cce957d6..be75d1a22 100644 --- a/0.5.10/cloud/billing/voucher/index.html +++ b/0.5.10/cloud/billing/voucher/index.html @@ -10,13 +10,13 @@ - +
Skip to main content
- + \ No newline at end of file diff --git a/0.5.10/cloud/index.html b/0.5.10/cloud/index.html index c60902516..bb2ea3f92 100644 --- a/0.5.10/cloud/index.html +++ b/0.5.10/cloud/index.html @@ -10,13 +10,13 @@ - +
Skip to main content
Version: 0.5.10

Starwhale Cloud User Guide

Starwhale Cloud is a service hosted on public cloud and operated by the Starwhale team. The access url is https://cloud.starwhale.cn.

- + \ No newline at end of file diff --git a/0.5.10/community/contribute/index.html b/0.5.10/community/contribute/index.html index 3203b6630..73ab12f7e 100644 --- a/0.5.10/community/contribute/index.html +++ b/0.5.10/community/contribute/index.html @@ -10,13 +10,13 @@ - +
Skip to main content
Version: 0.5.10

Contribute to Starwhale

Getting Involved/Contributing

We welcome and encourage all contributions to Starwhale, including and not limited to:

  • Describe the problems encountered during use.
  • Submit feature request.
  • Discuss in Slack and Github Issues.
  • Code Review.
  • Improve docs, tutorials and examples.
  • Fix Bug.
  • Add Test Case.
  • Code readability and code comments to import readability.
  • Develop new features.
  • Write enhancement proposal.

You can get involved, get updates and contact Starwhale developers in the following ways:

Starwhale Resources

Code Structure

  • client: swcli and Python SDK with Pure Python3, which includes all Standalone Instance features.
    • api: Python SDK.
    • cli: Command Line Interface entrypoint.
    • base: Python base abstract.
    • core: Starwhale core concepts which includes Dataset,Model,Runtime,Project, job and Evaluation, etc.
    • utils: Python utilities lib.
  • console: frontend with React + TypeScript.
  • server:Starwhale Controller with java, which includes all Starwhale Cloud Instance backend apis.
  • docker:Helm Charts, dockerfile.
  • docs:Starwhale官方文档。
  • example:Example code.
  • scripts:Bash and Python scripts for E2E testing and software releases, etc.

Fork and clone the repository

You will need to fork the code of Starwhale repository and clone it to your local machine.

  • Fork Starwhale repository: Fork Starwhale Github Repo,For more usage details, please refer to: Fork a repo

  • Install Git-LFS:Git-LFS

     git lfs install
  • Clone code to local machine

    git clone https://github.com/${your username}/starwhale.git

Development environment for Standalone Instance

Standalone Instance is written in Python3. When you want to modify swcli and sdk, you need to build the development environment.

Standalone development environment prerequisites

  • OS: Linux or macOS
  • Python: 3.7~3.11
  • Docker: >=19.03(optional)
  • Python isolated env tools:Python venv, virtualenv or conda, etc

Building from source code

Based on the previous step, clone to the local directory: starwhale, and enter the client subdirectory:

cd starwhale/client

Create an isolated python environment with conda:

conda create -n starwhale-dev python=3.8 -y
conda activate starwhale-dev

Install client package and python dependencies into the starwhale-dev environment:

make install-sw
make install-dev-req

Validate with the swcli --version command. In the development environment, the version is 0.0.0.dev0:

❯ swcli --version
swcli, version 0.0.0.dev0

❯ swcli --version
/home/username/anaconda3/envs/starwhale-dev/bin/swcli

Modifying the code

When you modify the code, you need not to install python package(run make install-sw command) again. .editorconfig will be imported into the most IDE and code editors which helps maintain consistent coding styles for multiple developers.

Lint and Test

Run unit test, E2E test, mypy lint, flake lint and isort check in the starwhale directory.

make client-all-check

Development environment for Cloud Instance

Cloud Instance is written in Java(backend) and React+TypeScript(frontend).

Development environment for Console

Development environment for Server

  • Language: Java
  • Build tool: Maven
  • Development framework: Spring Boot+Mybatis
  • Unit test framework:Junit5
    • Mockito used for mocking
    • Hamcrest used for assertion
    • Testcontainers used for providing lightweight, throwaway instances of common databases, Selenium web browsers that can run in a Docker container.
  • Check style tool:use maven-checkstyle-plugin

Server development environment prerequisites

  • OS: Linux, macOS or Windows
  • Docker: >=19.03
  • JDK: >=11
  • Maven: >=3.8.1
  • Mysql: >=8.0.29
  • Minio
  • Kubernetes cluster/Minikube(If you don't have a k8s cluster, you can use Minikube as an alternative for development and debugging)

Modify the code and add unit tests

Now you can enter the corresponding module to modify and adjust the code on the server side. The main business code directory is src/main/java, and the unit test directory is src/test/java.

Execute code check and run unit tests

cd starwhale/server
mvn clean test

Deploy the server at local machine

  • Dependent services that need to be deployed

    • Minikube(Optional. Minikube can be used when there is no k8s cluster, there is the installation doc: Minikube

      minikube start
      minikube addons enable ingress
      minikube addons enable ingress-dns
    • Mysql

      docker run --name sw-mysql -d \
      -p 3306:3306 \
      -e MYSQL_ROOT_PASSWORD=starwhale \
      -e MYSQL_USER=starwhale \
      -e MYSQL_PASSWORD=starwhale \
      -e MYSQL_DATABASE=starwhale \
      mysql:latest
    • Minio

      docker run --name minio -d \
      -p 9000:9000 --publish 9001:9001 \
      -e MINIO_DEFAULT_BUCKETS='starwhale' \
      -e MINIO_ROOT_USER="minioadmin" \
      -e MINIO_ROOT_PASSWORD="minioadmin" \
      bitnami/minio:latest
  • Package server program

    If you need to deploy the front-end at the same time when deploying the server, you can execute the build command of the front-end part first, and then execute 'mvn clean package', and the compiled front-end files will be automatically packaged.

    Use the following command to package the program

      cd starwhale/server
    mvn clean package
  • Specify the environment required for server startup

    # Minio env
    export SW_STORAGE_ENDPOINT=http://${Minio IP,default is:27.0.0.1}:9000
    export SW_STORAGE_BUCKET=${Minio bucket,default is:starwhale}
    export SW_STORAGE_ACCESSKEY=${Minio accessKey,default is:starwhale}
    export SW_STORAGE_SECRETKEY=${Minio secretKey,default is:starwhale}
    export SW_STORAGE_REGION=${Minio region,default is:local}
    # kubernetes env
    export KUBECONFIG=${the '.kube' file path}\.kube\config

    export SW_INSTANCE_URI=http://${Server IP}:8082
    export SW_METADATA_STORAGE_IP=${Mysql IP,default: 127.0.0.1}
    export SW_METADATA_STORAGE_PORT=${Mysql port,default: 3306}
    export SW_METADATA_STORAGE_DB=${Mysql dbname,default: starwhale}
    export SW_METADATA_STORAGE_USER=${Mysql user,default: starwhale}
    export SW_METADATA_STORAGE_PASSWORD=${user password,default: starwhale}
  • Deploy server service

    You can use the IDE or the command to deploy.

    java -jar controller/target/starwhale-controller-0.1.0-SNAPSHOT.jar
  • Debug

    there are two ways to debug the modified function:

    • Use swagger-ui for interface debugging, visit /swagger-ui/index.html to find the corresponding api
    • Debug the corresponding function directly in the ui (provided that the front-end code has been built in advance according to the instructions when packaging)
- + \ No newline at end of file diff --git a/0.5.10/concepts/index.html b/0.5.10/concepts/index.html index 2afb434f5..7a195103e 100644 --- a/0.5.10/concepts/index.html +++ b/0.5.10/concepts/index.html @@ -10,13 +10,13 @@ - +
Skip to main content
- + \ No newline at end of file diff --git a/0.5.10/concepts/names/index.html b/0.5.10/concepts/names/index.html index 67aa69122..682e48647 100644 --- a/0.5.10/concepts/names/index.html +++ b/0.5.10/concepts/names/index.html @@ -10,13 +10,13 @@ - +
Skip to main content
Version: 0.5.10

Names in Starwhale

Names mean project names, model names, dataset names, runtime names, and tag names.

Names Limitation

  • Names are case-insensitive.
  • A name MUST only consist of letters A-Z a-z, digits 0-9, the hyphen character -, the dot character ., and the underscore character _.
  • A name should always start with a letter or the _ character.
  • The maximum length of a name is 80.

Names uniqueness requirement

  • The resource name should be a unique string within its owner. For example, the project name should be unique in the owner instance, and the model name should be unique in the owner project.
  • The resource name can not be used by any other resource of the same kind in their owner, including those removed ones. For example, Project "apple" can not have two models named "Alice", even if one of them is already removed.
  • Different kinds of resources can have the same name. For example, a project and a model can be called "Alice" simultaneously.
  • Resources with different owners can have the same name. For example, a model in project "Apple" and a model in project "Banana" can have the same name "Alice".
  • Garbage-collected resources' names can be reused. For example, after the model with the name "Alice" in project "Apple" is removed and garbage collected, the project can have a new model with the same name "Alice".
- + \ No newline at end of file diff --git a/0.5.10/concepts/project/index.html b/0.5.10/concepts/project/index.html index dbe627b77..ce83ef250 100644 --- a/0.5.10/concepts/project/index.html +++ b/0.5.10/concepts/project/index.html @@ -10,13 +10,13 @@ - +
Skip to main content
Version: 0.5.10

Project in Starwhale

"Project" is the basic unit for organizing different resources like models, datasets, etc. You may use projects for different purposes. For example, you can create a project for a data scientist team, a product line, or a specific model. Users usually work on one or more projects in their daily lives.

Starwhale Server/Cloud projects are grouped by accounts. Starwhale Standalone does not have accounts. So you will not see any account name prefix in Starwhale Standalone projects. Starwhale Server/Cloud projects can be either "public" or "private". Public projects means all users on the same instance are assigned a "guest" role to the project by default. For more information about roles, see Roles and permissions in Starwhale.

A self project is created automatically and configured as the default project in Starwhale Standalone.

- + \ No newline at end of file diff --git a/0.5.10/concepts/roles-permissions/index.html b/0.5.10/concepts/roles-permissions/index.html index 9d682f4ec..5c250d650 100644 --- a/0.5.10/concepts/roles-permissions/index.html +++ b/0.5.10/concepts/roles-permissions/index.html @@ -10,13 +10,13 @@ - +
Skip to main content
Version: 0.5.10

Roles and permissions in Starwhale

Roles are used to assign permissions to users. Only Starwhale Server/Cloud has roles and permissions, and Starwhale Standalone does not.The Administrator role is automatically created and assigned to the user "admin". Some sensitive operations can only be performed by users with the Administrator role, for example, creating accounts in Starwhale Server.

Projects have three roles:

  • Admin - Project administrators can read and write project data and assign project roles to users.
  • Maintainer - Project maintainers can read and write project data.
  • Guest - Project guests can only read project data.
ActionAdminMaintainerGuest
Manage project membersYes
Edit projectYesYes
View projectYesYesYes
Create evaluationsYesYes
Remove evaluationsYesYes
View evaluationsYesYesYes
Create datasetsYesYes
Update datasetsYesYes
Remove datasetsYesYes
View datasetsYesYesYes
Create modelsYesYes
Update modelsYesYes
Remove modelsYesYes
View modelsYesYesYes
Create runtimesYesYes
Update runtimesYesYes
Remove runtimesYesYes
View runtimesYesYesYes

The user who creates a project becomes the first project administrator. They can assign roles to other users later.

- + \ No newline at end of file diff --git a/0.5.10/concepts/versioning/index.html b/0.5.10/concepts/versioning/index.html index f76797865..fb1de8546 100644 --- a/0.5.10/concepts/versioning/index.html +++ b/0.5.10/concepts/versioning/index.html @@ -10,13 +10,13 @@ - +
Skip to main content
Version: 0.5.10

Resource versioning in Starwhale

  • Starwhale manages the history of all models, datasets, and runtimes. Every update to a specific resource appends a new version of the history.
  • Versions are identified by a version id which is a random string generated automatically by Starwhale and are ordered by their creation time.
  • Versions can have tags. Starwhale uses version tags to provide a human-friendly representation of versions. By default, Starwhale attaches a default tag to each version. The default tag is the letter "v", followed by a number. For each versioned resource, the first version tag is always tagged with "v0", the second version is tagged with "v1", and so on. And there is a special tag "latest" that always points to the last version. When a version is removed, its default tag will not be reused. For example, there is a model with tags "v0, v1, v2". When "v2" is removed, tags will be "v0, v1". And the following tag will be "v3" instead of "v2" again. You can attach your own tags to any version and remove them at any time.
  • Starwhale uses a linear history model. There is neither branch nor cycle in history.
  • History can not be rollback. When a version is to be reverted, Starwhale clones the version and appends it as a new version to the end of the history. Versions in history can be manually removed and recovered.
- + \ No newline at end of file diff --git a/0.5.10/dataset/index.html b/0.5.10/dataset/index.html index c8d97a558..0d88b46d6 100644 --- a/0.5.10/dataset/index.html +++ b/0.5.10/dataset/index.html @@ -10,13 +10,13 @@ - +
Skip to main content
Version: 0.5.10

Starwhale Dataset User Guide

Design Overview

Starwhale Dataset Positioning

The Starwhale Dataset contains three core stages: data construction, data loading, and data visualization. It is a data management tool for the ML/DL field. Starwhale Dataset can directly use the environment built by Starwhale Runtime, and can be seamlessly integrated with Starwhale Model and Starwhale Evaluation. It is an important part of the Starwhale MLOps toolchain.

According to the classification of MLOps Roles in Machine Learning Operations (MLOps): Overview, Definition, and Architecture, the three stages of Starwhale Dataset target the following user groups:

  • Data construction: Data Engineer, Data Scientist
  • Data loading: Data Scientist, ML Developer
  • Data visualization: Data Engineer, Data Scientist, ML Developer

mlops-users

Core Functions

  • Efficient loading: The original dataset files are stored in external storage such as OSS or NAS, and are loaded on demand without having to save to disk.
  • Simple construction: Supports one-click dataset construction from Image/Video/Audio directories, json files and Huggingface datasets, and also supports writing Python code to build completely custom datasets.
  • Versioning: Can perform version tracking, data append and other operations, and avoid duplicate data storage through the internally abstracted ObjectStore.
  • Sharing: Implement bidirectional dataset sharing between Standalone instances and Cloud/Server instances through the swcli dataset copy command.
  • Visualization: The web interface of Cloud/Server instances can present multi-dimensional, multi-type data visualization of datasets.
  • Artifact storage: The Standalone instance can store locally built or distributed swds series files, while the Cloud/Server instance uses object storage to provide centralized swds artifact storage.
  • Seamless Starwhale integration: Starwhale Dataset can use the runtime environment built by Starwhale Runtime to build datasets. Starwhale Evaluation and Starwhale Model can directly specify the dataset through the --dataset parameter to complete automatic data loading, which facilitates inference, model evaluation and other environments.

Key Elements

  • swds virtual package file: swds is different from swmp and swrt. It is not a single packaged file, but a virtual concept that specifically refers to a directory that contains dataset-related files for a version of the Starwhale dataset, including _manifest.yaml, dataset.yaml, dataset build Python scripts, and data file links, etc. You can use the swcli dataset info command to view where the swds is located. swds is the abbreviation of Starwhale Dataset.

swds-tree.png

  • swcli dataset command line: A set of dataset-related commands, including construction, distribution and management functions. See CLI Reference for details.
  • dataset.yaml configuration file: Describes the dataset construction process. It can be completely omitted and specified through swcli dataset build parameters. dataset.yaml can be considered as a configuration file representation of the swcli dataset build command line parameters. swcli dataset build parameters take precedence over dataset.yaml.
  • Dataset Python SDK: Includes data construction, data loading, and several predefined data types. See Python SDK for details.
  • Python scripts for dataset construction: A series of scripts written using the Starwhale Python SDK to build datasets.

Best Practices

The construction of Starwhale Dataset is performed independently. If third-party libraries need to be introduced when writing construction scripts, using Starwhale Runtime can simplify Python dependency management and ensure reproducible dataset construction. The Starwhale platform will build in as many open source datasets as possible for users to copy datasets for immediate use.

Command Line Grouping

The Starwhale Dataset command line can be divided into the following stages from the perspective of usage phases:

  • Construction phase
    • swcli dataset build
  • Visualization phase
    • swcli dataset diff
    • swcli dataset head
  • Distribution phase
    • swcli dataset copy
  • Basic management
    • swcli dataset tag
    • swcli dataset info
    • swcli dataset history
    • swcli dataset list
    • swcli dataset summary
    • swcli dataset remove
    • swcli dataset recover

Starwhale Dataset Viewer

Currently, the Web UI in the Cloud/Server instance can visually display the dataset. Currently, only DataTypes using the Python SDK can be correctly interpreted by the frontend, with mappings as follows:

  • Image: Display thumbnails, enlarged images, MASK type images, support image/png, image/jpeg, image/webp, image/svg+xml, image/gif, image/apng, image/avif formats.
  • Audio: Displayed as an audio wave graph, playable, supports audio/mp3 and audio/wav formats.
  • Video: Displayed as a video, playable, supports video/mp4, video/avi and video/webm formats.
  • GrayscaleImage: Display grayscale images, support x/grayscale format.
  • Text: Display text, support text/plain format, set encoding format, default is utf-8.
  • Binary and Bytes: Not supported for display currently.
  • Link: The above multimedia types all support specifying links as storage paths.

Starwhale Dataset Data Format

The dataset consists of multiple rows, each row being a sample, each sample containing several features. The features have a dict-like structure with some simple restrictions [L]:

  • The dict keys must be str type.
  • The dict values must be Python basic types like int/float/bool/str/bytes/dict/list/tuple, or Starwhale built-in data types.
  • For the same key across different samples, the value types do not need to stay the same.
  • If the value is a list or tuple, the element data types must be consistent.
  • For dict values, the restrictions are the same as [L].

Example:

{
"img": GrayscaleImage(
link=Link(
"123",
offset=32,
size=784,
_swds_bin_offset=0,
_swds_bin_size=8160,
)
),
"label": 0,
}

File Data Handling

Starwhale Dataset handles file type data in a special way. You can ignore this section if you don't care about Starwhale's implementation.

According to actual usage scenarios, Starwhale Dataset has two ways of handling file class data that is based on the base class starwhale.BaseArtifact:

  • swds-bin: Starwhale merges the data into several large files in its own binary format (swds-bin), which can efficiently perform indexing, slicing and loading.
  • remote-link: If the user's original data is stored in some external storage such as OSS or NAS, with a lot of original data that is inconvenient to move or has already been encapsulated by some internal dataset implementation, then you only need to use links in the data to establish indexes.

In the same Starwhale dataset, two types of data can be included simultaneously.

- + \ No newline at end of file diff --git a/0.5.10/dataset/yaml/index.html b/0.5.10/dataset/yaml/index.html index 79d31340a..2571939e9 100644 --- a/0.5.10/dataset/yaml/index.html +++ b/0.5.10/dataset/yaml/index.html @@ -10,13 +10,13 @@ - +
Skip to main content
Version: 0.5.10

The dataset.yaml Specification

tip

dataset.yaml is optional for the swcli dataset build command.

Building Starwhale Dataset uses dataset.yaml. Omitting dataset.yaml allows describing related configurations in swcli dataset build command line parameters. dataset.yaml can be considered as a file-based representation of the build command line configuration.

YAML Field Descriptions

FieldDescriptionRequiredTypeDefault
nameName of the Starwhale DatasetYesString
handlerImportable address of a class that inherits starwhale.SWDSBinBuildExecutor, starwhale.UserRawBuildExecutor or starwhale.BuildExecutor, or a function that returns a Generator or iterable object. Format is {module path}:{class name\|function name}YesString
descDataset descriptionNoString""
versiondataset.yaml format version, currently only "1.0" is supportedNoString1.0
attrDataset build parametersNoDict
attr.volume_sizeSize of each data file in the swds-bin dataset. Can be a number in bytes, or a number plus unit like 64M, 1GB etc.NoInt or Str64MB
attr.alignment_sizeData alignment size of each data block in the swds-bin dataset. If set to 4k, and a data block is 7.9K, 0.1K padding will be added to make the block size a multiple of alignment_size, improving page size and read efficiency.NoInteger or String128

Examples

Simplest Example

name: helloworld
handler: dataset:ExampleProcessExecutor

The helloworld dataset uses the ExampleProcessExecutor class in dataset.py of the dataset.yaml directory to build data.

MNIST Dataset Build Example

name: mnist
handler: mnist.dataset:DatasetProcessExecutor
desc: MNIST data and label test dataset
attr:
alignment_size: 128
volume_size: 4M

Example with handler as a generator function

dataset.yaml contents:

name: helloworld
handler: dataset:iter_item

dataset.py contents:

def iter_item():
for i in range(10):
yield {"img": f"image-{i}".encode(), "label": i}
- + \ No newline at end of file diff --git a/0.5.10/evaluation/heterogeneous/node-able/index.html b/0.5.10/evaluation/heterogeneous/node-able/index.html index dc0d3ad74..4a1c69a2b 100644 --- a/0.5.10/evaluation/heterogeneous/node-able/index.html +++ b/0.5.10/evaluation/heterogeneous/node-able/index.html @@ -10,7 +10,7 @@ - + @@ -23,7 +23,7 @@ Refer to the link.

Take v0.13.0-rc.1 as an example:

kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.13.0-rc.1/nvidia-device-plugin.yml

Note: This operation will run the NVIDIA device plugin plugin on all Kubernetes nodes. If configured before, it will be updated. Please evaluate the image version used carefully.

  • Confirm GPU can be discovered and used in the cluster. Refer to the command below. Check that nvidia.com/gpu is in the Capacity of the Jetson node. The GPU is then recognized normally by the Kubernetes cluster.

    # kubectl describe node orin | grep -A15 Capacity
    Capacity:
    cpu: 12
    ephemeral-storage: 59549612Ki
    hugepages-1Gi: 0
    hugepages-2Mi: 0
    hugepages-32Mi: 0
    hugepages-64Ki: 0
    memory: 31357608Ki
    nvidia.com/gpu: 1
    pods: 110
  • Build and Use Custom Images

    The l4t-jetpack image mentioned earlier can meet our general use. If we need to customize a more streamlined image or one with more features, we can make it based on l4t-base. Relevant Dockerfiles can refer to the image Starwhale made for mnist.

    - + \ No newline at end of file diff --git a/0.5.10/evaluation/heterogeneous/virtual-node/index.html b/0.5.10/evaluation/heterogeneous/virtual-node/index.html index 1bb52002e..392ac66f5 100644 --- a/0.5.10/evaluation/heterogeneous/virtual-node/index.html +++ b/0.5.10/evaluation/heterogeneous/virtual-node/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.10

    Virtual Kubelet as Kubernetes nodes

    Introduction

    Virtual Kubelet is an open source framework that can simulate a K8s node by mimicking the communication between kubelet and the K8s cluster.

    This solution is widely used by major cloud vendors for serverless container cluster solutions, such as Alibaba Cloud's ASK, Amazon's AWS Fargate, etc.

    Principles

    The virtual kubelet framework implements the related interfaces of kubelet for Node. With simple configuration, it can simulate a node.

    We only need to implement the PodLifecycleHandler interface to support:

    • Create, update, delete Pod
    • Get Pod status
    • Get Container logs

    Adding Devices to the Cluster

    If our device cannot serve as a K8s node due to resource constraints or other situations, we can manage these devices by using virtual kubelet to simulate a proxy node.

    The control flow between Starwhale Controller and the device is as follows:


    ┌──────────────────────┐ ┌────────────────┐ ┌─────────────────┐ ┌────────────┐
    │ Starwhale Controller ├─────►│ K8s API Server ├────►│ virtual kubelet ├────►│ Our device │
    └──────────────────────┘ └────────────────┘ └─────────────────┘ └────────────┘

    Virtual kubelet converts the Pod orchestration information sent by Starwhale Controller into control behaviors for the device, such as executing a command via ssh on the device, or sending a message via USB or serial port.

    Below is an example of using virtual kubelet to control a device not joined to the cluster that is SSH-enabled:

    1. Prepare certificates
    • Create file vklet.csr with the following content:
    [req]
    req_extensions = v3_req
    distinguished_name = req_distinguished_name

    [req_distinguished_name]

    [v3_req]
    basicConstraints = CA:FALSE
    keyUsage = digitalSignature, keyEncipherment
    extendedKeyUsage = serverAuth
    subjectAltName = @alt_names

    [alt_names]
    IP = 1.2.3.4
    • Generate the certificate:
    openssl genrsa -out vklet-key.pem 2048
    openssl req -new -key vklet-key.pem -out vklet.csr -subj '/CN=system:node:1.2.3.4;/C=US/O=system:nodes' -config ./csr.conf
    • Submit the certificate:
    cat vklet.csr| base64 | tr -d "\n" # output as content of spec.request in csr.yaml

    csr.yaml:

    apiVersion: certificates.k8s.io/v1
    kind: CertificateSigningRequest
    metadata:
    name: vklet
    spec:
    request: ******************
    signerName: kubernetes.io/kube-apiserver-client
    expirationSeconds: 1086400
    usages:
    - client auth
    kubectl apply -f csr.yaml
    kubectl certificate approve vklet
    kubectl get csr vklet -o jsonpath='{.status.certificate}'| base64 -d > vklet-cert.pem

    Now we have vklet-cert.pem.

    • Compile virtual kubelet:
    git clone https://github.com/virtual-kubelet/virtual-kubelet
    cd virtual-kubelet && make build

    Create the node configuration file mock.json:

    {
    "virtual-kubelet":
    {
    "cpu": "100",
    "memory": "100Gi",
    "pods": "100"
    }
    }

    Start virtual kubelet:

    export APISERVER_CERT_LOCATION=/path/to/vklet-cert.pem
    export APISERVER_KEY_LOCATION=/path/to/vklet-key.pem
    export KUBECONFIG=/path/to/kubeconfig
    virtual-kubelet --provider mock --provider-config /path/to/mock.json

    Now we have simulated a node with 100 cores + 100GB memory using virtual kubelet.

    • Add PodLifecycleHandler implementation to convert important information in Pod orchestration into ssh command execution, and collect logs for Starwhale Controller to collect.

    See ssh executor for a concrete implementation.

    - + \ No newline at end of file diff --git a/0.5.10/evaluation/index.html b/0.5.10/evaluation/index.html index b07528bf0..27ed38604 100644 --- a/0.5.10/evaluation/index.html +++ b/0.5.10/evaluation/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.10

    Starwhale Model Evaluation

    Design Overview

    Starwhale Evaluation Positioning

    The goal of Starwhale Evaluation is to provide end-to-end management for model evaluation, including creating Jobs, distributing Tasks, viewing model evaluation reports and basic management. Starwhale Evaluation is a specific application of Starwhale Model, Starwhale Dataset, and Starwhale Runtime in the model evaluation scenario. Starwhale Evaluation is part of the MLOps toolchain built by Starwhale. More applications like Starwhale Model Serving, Starwhale Training will be included in the future.

    Core Features

    • Visualization: Both swcli and the Web UI provide visualization of model evaluation results, supporting comparison of multiple results. Users can also customize logging of intermediate processes.

    • Multi-scenario Adaptation: Whether it's a notebook, desktop or distributed cluster environment, the same commands, Python scripts, artifacts and operations can be used for model evaluation. This satisfies different computational power and data volume requirements.

    • Seamless Starwhale Integration: Leverage Starwhale Runtime for the runtime environment, Starwhale Dataset as data input, and run models from Starwhale Model. Configuration is simple whether using swcli, Python SDK or Cloud/Server instance Web UI.

    Key Elements

    • swcli model run: Command line for bulk offline model evaluation.
    • swcli model serve: Command line for online model evaluation.

    Best Practices

    Command Line Grouping

    From the perspective of completing an end-to-end Starwhale Evaluation workflow, commands can be grouped as:

    • Preparation Stage
      • swcli dataset build or Starwhale Dataset Python SDK
      • swcli model build or Starwhale Model Python SDK
      • swcli runtime build
    • Evaluation Stage
      • swcli model run
      • swcli model serve
    • Results Stage
      • swcli job info
    • Basic Management
      • swcli job list
      • swcli job remove
      • swcli job recover

    Abstraction job-step-task

    • job: A model evaluation task is a job, which contains one or more steps.

    • step: A step corresponds to a stage in the evaluation process. With the default PipelineHandler, steps are predict and evaluate. For custom evaluation processes using @handler, @evaluation.predict, @evaluation.evaluate decorators, steps are the decorated functions. Steps can have dependencies, forming a DAG. A step contains one or more tasks. Tasks in the same step have the same logic but different inputs. A common approach is to split the dataset into multiple parts, with each part passed to a task. Tasks can run in parallel.

    • task: A task is the final running entity. In Cloud/Server instances, a task is a container in a Pod. In Standalone instances, a task is a Python Thread.

    The job-step-task abstraction is the basis for implementing distributed runs in Starwhale Evaluation.

    - + \ No newline at end of file diff --git a/0.5.10/faq/index.html b/0.5.10/faq/index.html index 563bee100..c8cae9f9e 100644 --- a/0.5.10/faq/index.html +++ b/0.5.10/faq/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    - + \ No newline at end of file diff --git a/0.5.10/getting-started/cloud/index.html b/0.5.10/getting-started/cloud/index.html index 91ce452be..18b25f639 100644 --- a/0.5.10/getting-started/cloud/index.html +++ b/0.5.10/getting-started/cloud/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.10

    Getting started with Starwhale Cloud

    Starwhale Cloud is hosted on Aliyun with the domain name https://cloud.starwhale.cn. In the futher, we will launch the service on AWS with the domain name https://cloud.starwhale.ai. It's important to note that these are two separate instances that are not interconnected, and accounts and data are not shared. You can choose either one to get started.

    You need to install the Starwhale Client (swcli) at first.

    Sign Up for Starwhale Cloud and create your first project

    You can either directly log in with your GitHub or Weixin account or sign up for an account. You will be asked for an account name if you log in with your GitHub or Weixin account.

    Then you can create a new project. In this tutorial, we will use the name demo for the project name.

    Build the dataset, model, and runtime on your local machine

    Follow step 1 to step 4 in Getting started with Starwhale Standalone to create:

    • a Starwhale model named mnist
    • a Starwhale dataset named mnist
    • a Starwhale runtime named pytorch

    Login to the cloud instance

    swcli instance login --username <your account name> --password <your password> --alias swcloud https://cloud.starwhale.cn

    Copy the dataset, model, and runtime to the cloud instance

    swcli model copy mnist swcloud/project/<your account name>:demo
    swcli dataset copy mnist swcloud/project/<your account name>:demo
    swcli runtime copy pytorch swcloud/project/<your account name>:demo

    Run an evaluation with the web UI

    console-create-job.gif

    Congratulations! You have completed the Starwhale Cloud Getting Started Guide.

    - + \ No newline at end of file diff --git a/0.5.10/getting-started/index.html b/0.5.10/getting-started/index.html index 1f9b00a28..9b24090b8 100644 --- a/0.5.10/getting-started/index.html +++ b/0.5.10/getting-started/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.10

    Getting started

    First, you need to install the Starwhale Client (swcli), which can be done by running the following command:

    python3 -m pip install starwhale

    For more information, see the swcli installation guide.

    Depending on your instance type, there are three getting-started guides available for you:

    • Getting started with Starwhale Standalone - This guide helps you run an MNIST evaluation on your desktop PC/laptop. It is the fastest and simplest way to get started with Starwhale.
    • Getting started with Starwhale Server - This guide helps you install Starwhale Server in your private data center and run an MNIST evaluation. At the end of the tutorial, you will have a Starwhale Server instance where you can run model evaluations on and manage your datasets and models.
    • Getting started with Starwhale Cloud - This guide helps you create an account on Starwhale Cloud and run an MNIST evaluation. It is the easiest way to experience all Starwhale features.
    - + \ No newline at end of file diff --git a/0.5.10/getting-started/runtime/index.html b/0.5.10/getting-started/runtime/index.html index fc6f3cac0..d9d5b4293 100644 --- a/0.5.10/getting-started/runtime/index.html +++ b/0.5.10/getting-started/runtime/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.10

    Getting Started with Starwhale Runtime

    This article demonstrates how to build a Starwhale Runtime of the Pytorch environment and how to use it. This runtime can meet the dependency requirements of the six examples in Starwhale: mnist, speech commands, nmt, cifar10, ag_news, and PennFudan. Links to relevant code: example/runtime/pytorch.

    You can learn the following things from this tutorial:

    • How to build a Starwhale Runtime.
    • How to use a Starwhale Runtime in different scenarios.
    • How to release a Starwhale Runtime.

    Prerequisites

    Run the following command to clone the example code:

    git clone https://github.com/star-whale/starwhale.git
    cd starwhale/example/runtime/pytorch # for users in the mainland of China, use pytorch-cn-mirror instead.

    Build Starwhale Runtime

    ❯ swcli -vvv runtime build --yaml runtime.yaml

    Use Starwhale Runtime in the standalone instance

    Use Starwhale Runtime in the shell

    # Activate the runtime
    swcli runtime activate pytorch

    swcli runtime activate will download all python dependencies of the runtime, which may take a long time.

    All dependencies are ready in your python environment when the runtime is activated. It is similar to source venv/bin/activate of virtualenv or the conda activate command of conda. If you close the shell or switch to another shell, you need to reactivate the runtime.

    Use Starwhale Runtime in swcli

    # Use the runtime when building a Starwhale Model
    swcli model build . --runtime pytorch
    # Use the runtime when building a Starwhale Dataset
    swcli dataset build --yaml /path/to/dataset.yaml --runtime pytorch
    # Run a model evaluation with the runtime
    swcli model run --uri mnist/version/v0 --dataset mnist --runtime pytorch

    Copy Starwhale Runtime to another instance

    You can copy the runtime to a server/cloud instance, which can then be used in the server/cloud instance or downloaded by other users.

    # Copy the runtime to a server instance named 'pre-k8s'
    ❯ swcli runtime copy pytorch cloud://pre-k8s/project/starwhale
    - + \ No newline at end of file diff --git a/0.5.10/getting-started/server/index.html b/0.5.10/getting-started/server/index.html index 3f0ab9d5e..90cdd85a9 100644 --- a/0.5.10/getting-started/server/index.html +++ b/0.5.10/getting-started/server/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.10

    Getting started with Starwhale Server

    Install Starwhale Server

    To install Starwhale Server, see the installation guide.

    Create your first project

    Login to the server

    Open your browser and enter your server's URL in the address bar. Login with your username(starwhale) and password(abcd1234).

    console-artifacts.gif

    Create a new project

    Build the dataset, model, and runtime on your local machine

    Follow step 1 to step 4 in Getting started with Starwhale Standalone to create:

    • a Starwhale model named mnist
    • a Starwhale dataset named mnist
    • a Starwhale runtime named pytorch

    Copy the dataset, the model, and the runtime to the server

    swcli instance login --username <your username> --password <your password> --alias server <Your Server URL>

    swcli model copy mnist server/project/demo
    swcli dataset copy mnist server/project/demo
    swcli runtime copy pytorch server/project/demo

    Use the Web UI to run an evaluation

    Navigate to the "demo" project in your browser and create a new one.

    console-create-job.gif

    Congratulations! You have completed the Starwhale Server Getting Started Guide.

    - + \ No newline at end of file diff --git a/0.5.10/getting-started/standalone/index.html b/0.5.10/getting-started/standalone/index.html index e043cd2a8..49874874e 100644 --- a/0.5.10/getting-started/standalone/index.html +++ b/0.5.10/getting-started/standalone/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.10

    Getting started with Starwhale Standalone

    When the Starwhale Client (swcli) is installed, you are ready to use Starwhale Standalone.

    We also provide a Jupyter Notebook example, you can try it in Google Colab or in your local vscode/jupyterlab.

    Downloading Examples

    Download Starwhale examples by cloning the Starwhale project via:

    GIT_LFS_SKIP_SMUDGE=1 git clone https://github.com/star-whale/starwhale.git --depth 1
    cd starwhale

    To save time in the example downloading, we skip git-lfs and other commits info. We will use ML/DL HelloWorld code MNIST to start your Starwhale journey. The following steps are all performed in the starwhale directory.

    Core Workflow

    Building a Pytorch Runtime

    Runtime example codes are in the example/runtime/pytorch directory.

    • Build the Starwhale runtime bundle:

      swcli runtime build --yaml example/runtime/pytorch/runtime.yaml
      tip

      When you first build runtime, creating an isolated python environment and downloading python dependencies will take a lot of time. The command execution time is related to the network environment of the machine and the number of packages in the runtime.yaml. Using the befitting pypi mirror and cache config in the ~/.pip/pip.conf file is a recommended practice.

      For users in the mainland of China, the following conf file is an option:

      [global]
      cache-dir = ~/.cache/pip
      index-url = https://pypi.tuna.tsinghua.edu.cn/simple
      extra-index-url = https://mirrors.aliyun.com/pypi/simple/
    • Check your local Starwhale Runtime:

      swcli runtime list
      swcli runtime info pytorch

    Building a Model

    Model example codes are in the example/mnist directory.

    • Download the pre-trained model file:

      cd example/mnist
      make download-model
      # For users in the mainland of China, please add `CN=1` environment for make command:
      # CN=1 make download-model
      cd -
    • Build a Starwhale model:

      swcli model build example/mnist --runtime pytorch
    • Check your local Starwhale models:

      swcli model list
      swcli model info mnist

    Building a Dataset

    Dataset example codes are in the example/mnist directory.

    • Download the MNIST raw data:

      cd example/mnist
      make download-data
      # For users in the mainland of China, please add `CN=1` environment for make command:
      # CN=1 make download-data
      cd -
    • Build a Starwhale dataset:

      swcli dataset build --yaml example/mnist/dataset.yaml
    • Check your local Starwhale dataset:

      swcli dataset list
      swcli dataset info mnist
      swcli dataset head mnist

    Running an Evaluation Job

    • Create an evaluation job:

      swcli -vvv model run --uri mnist --dataset mnist --runtime pytorch
    • Check the evaluation result

      swcli job list
      swcli job info $(swcli job list | grep mnist | grep success | awk '{print $1}' | head -n 1)

    Congratulations! You have completed the Starwhale Standalone Getting Started Guide.

    - + \ No newline at end of file diff --git a/0.5.10/index.html b/0.5.10/index.html index 76b5cdc86..796d3e298 100644 --- a/0.5.10/index.html +++ b/0.5.10/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.10

    What is Starwhale

    Overview

    Starwhale is an MLOps/LLMOps platform that make your model creation, evaluation and publication much easier. It aims to create a handy tool for data scientists and machine learning engineers.

    Starwhale helps you:

    • Keep track of your training/testing dataset history including data items and their labels, so that you can easily access them.
    • Manage your model packages that you can share across your team.
    • Run your models in different environments, either on a Nvidia GPU server or on an embedded device like Cherry Pi.
    • Create a online service with interactive Web UI for your models.

    Starwhale is designed to be an open platform. You can create your own plugins to meet your requirements.

    Deployment options

    Each deployment of Starwhale is called an instance. All instances can be managed by the Starwhale Client (swcli).

    You can start using Starwhale with one of the following instance types:

    • Starwhale Standalone - Rather than a running service, Starwhale Standalone is actually a repository that resides in your local file system. It is created and managed by the Starwhale Client (swcli). You only need to install swcli to use it. Currently, each user on a single machine can have only ONE Starwhale Standalone instance. We recommend you use the Starwhale Standalone to build and test your datasets, runtime, and models before pushing them to Starwhale Server/Cloud instances.
    • Starwhale Server - Starwhale Server is a service deployed on your local server. Besides text-only results from the Starwhale Client (swcli), Starwhale Server provides Web UI for you to manage your datasets and models, evaluate your models in your local Kubernetes cluster, and review the evaluation results.
    • Starwhale Cloud - Starwhale Cloud is a managed service hosted on public clouds. By registering an account on https://cloud.starwhale.cn, you are ready to use Starwhale without needing to install, operate, and maintain your own instances. Starwhale Cloud also provides public resources for you to download, like datasets, runtimes, and models. Check the "starwhale/public" project on Starwhale Cloud for more details.

    When choosing which instance type to use, consider the following:

    Instance TypeDeployment locationMaintained byUser InterfaceScalability
    Starwhale StandaloneYour laptop or any server in your data centerNot requiredCommand lineNot scalable
    Starwhale ServerYour data centerYourselfWeb UI and command lineScalable, depends on your Kubernetes cluster
    Starwhale CloudPublic cloud, like AWS or Aliyunthe Starwhale TeamWeb UI and command lineScalable, but currently limited by the freely available resource on the cloud
    - + \ No newline at end of file diff --git a/0.5.10/model/index.html b/0.5.10/model/index.html index 4ab98609e..52e15c02f 100644 --- a/0.5.10/model/index.html +++ b/0.5.10/model/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.10

    Starwhale Model

    A Starwhale Model is a standard format for packaging machine learning models that can be used for various purposes, like model fine-tuning, model evaluation, and online serving. A Starwhale Model contains the model file, inference codes, configuration files, and any other files required to run the model.

    Create a Starwhale Model

    There are two ways to create a Starwhale Model: by swcli or by Python SDK.

    Create a Starwhale Model by swcli

    To create a Starwhale Model by swcli, you need to define a model.yaml, which describes some required information about the model package, and run the following command:

    swcli model build . --model-yaml /path/to/model.yaml

    For more information about the command and model.yaml, see the swcli reference. model.yaml is optional for model building.

    Create a Starwhale Model by Python SDK

    from starwhale import model, predict

    @predict
    def predict_img(data):
    ...

    model.build(name="mnist", modules=[predict_img])

    Model Management

    Model Management by swcli

    CommandDescription
    swcli model listList all Starwhale Models in a project
    swcli model infoShow detail information about a Starwhale Model
    swcli model copyCopy a Starwhale Model to another location
    swcli model removeRemove a Starwhale Model
    swcli model recoverRecover a previously removed Starwhale Model

    Model Management by WebUI

    Model History

    Starwhale Models are versioned. The general rules about versions are described in Resource versioning in Starwhale.

    Model History Management by swcli

    CommandDescription
    swcli model historyList all versions of a Starwhale Model
    swcli model infoShow detail information about a Starwhale Model version
    swcli model diffCompare two versions of a Starwhale model
    swcli model copyCopy a Starwhale Model version to a new one
    swcli model removeRemove a Starwhale Model version
    swcli model recoverRecover a previously removed Starwhale Model version

    Model Evaluation

    Model Evaluation by swcli

    CommandDescription
    swcli model runCreate an evaluation with a Starwhale Model

    The Storage Format

    The Starwhale Model is a tarball file that contains the source directory.

    - + \ No newline at end of file diff --git a/0.5.10/model/yaml/index.html b/0.5.10/model/yaml/index.html index ff15eb713..9f0ec26fa 100644 --- a/0.5.10/model/yaml/index.html +++ b/0.5.10/model/yaml/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.10

    The model.yaml Specification

    tip

    model.yaml is optional for swcli model build.

    When building a Starwhale Model using the swcli model build command, you can specify a yaml file that follows a specific format via the --model-yaml parameter to simplify specifying build parameters.

    Even without specifying the --model-yaml parameter, swcli model build will automatically look for a model.yaml file under the ${workdir} directory and extract parameters from it. Parameters specified on the swcli model build command line take precedence over equivalent configurations in model.yaml, so you can think of model.yaml as a file-based representation of the build command line.

    When building a Starwhale Model using the Python SDK, the model.yaml file does not take effect.

    YAML Field Descriptions

    FieldDescriptionRequiredTypeDefault
    nameName of the Starwhale Model, equivalent to --name parameter.NoString
    run.modulesPython Modules searched during model build, can specify multiple entry points for model execution, format is Python Importable path. Equivalent to --module parameter.YesList[String]
    run.handlerDeprecated alias of run.modules, can only specify one entry point.NoString
    versiondataset.yaml format version, currently only supports "1.0"NoString1.0
    descModel description, equivalent to --desc parameter.NoString

    Example


    name: helloworld

    run:
    modules:
    - src.evaluator

    desc: "example yaml"

    A Starwhale model named helloworld, searches for functions decorated with @evaluation.predict, @evaluation.evaluate or @handler, or classes inheriting from PipelineHandler in src/evaluator.py under ${WORKDIR} of the swcli model build command. These functions or classes will be added to the list of runnable entry points for the Starwhale model. When running the model via swcli model run or Web UI, select the corresponding entry point (handler) to run.

    model.yaml is optional, parameters defined in yaml can also be specified via swcli command line parameters.


    swcli model build . --model-yaml model.yaml

    Is equivalent to:


    swcli model build . --name helloworld --module src.evaluator --desc "example yaml"

    - + \ No newline at end of file diff --git a/0.5.10/reference/sdk/dataset/index.html b/0.5.10/reference/sdk/dataset/index.html index 6e982b973..98dbb256d 100644 --- a/0.5.10/reference/sdk/dataset/index.html +++ b/0.5.10/reference/sdk/dataset/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.10

    Starwhale Dataset SDK

    dataset

    Get starwhale.Dataset object, by creating new datasets or loading existing datasets.

    @classmethod
    def dataset(
    cls,
    uri: t.Union[str, Resource],
    create: str = _DatasetCreateMode.auto,
    readonly: bool = False,
    ) -> Dataset:

    Parameters

    • uri: (str or Resource, required)
      • The dataset uri or Resource object.
    • create: (str, optional)
      • The mode of dataset creating. The options are auto, empty and forbid.
        • auto mode: If the dataset already exists, creation is ignored. If it does not exist, the dataset is created automatically.
        • empty mode: If the dataset already exists, an Exception is raised; If it does not exist, an empty dataset is created. This mode ensures the creation of a new, empty dataset.
        • forbid mode: If the dataset already exists, nothing is done.If it does not exist, an Exception is raised. This mode ensures the existence of the dataset.
      • The default is auto.
    • readonly: (bool, optional)
      • For an existing dataset, you can specify the readonly=True argument to ensure the dataset is in readonly mode.
      • Default is False.

    Examples

    from starwhale import dataset, Image

    # create a new dataset named mnist, and add a row into the dataset
    # dataset("mnist") is equal to dataset("mnist", create="auto")
    ds = dataset("mnist")
    ds.exists() # return False, "mnist" dataset is not existing.
    ds.append({"img": Image(), "label": 1})
    ds.commit()
    ds.close()

    # load a cloud instance dataset in readonly mode
    ds = dataset("cloud://remote-instance/project/starwhale/dataset/mnist", readonly=True)
    labels = [row.features.label in ds]
    ds.close()

    # load a read/write dataset with a specified version
    ds = dataset("mnist/version/mrrdczdbmzsw")
    ds[0].features.label = 1
    ds.commit()
    ds.close()

    # create an empty dataset
    ds = dataset("mnist-empty", create="empty")

    # ensure the dataset existence
    ds = dataset("mnist-existed", create="forbid")

    class starwhale.Dataset

    starwhale.Dataset implements the abstraction of a Starwhale dataset, and can operate on datasets in Standalone/Server/Cloud instances.

    from_huggingface

    from_huggingface is a classmethod that can convert a Huggingface dataset into a Starwhale dataset.

    def from_huggingface(
    cls,
    name: str,
    repo: str,
    subset: str | None = None,
    split: str | None = None,
    revision: str = "main",
    alignment_size: int | str = D_ALIGNMENT_SIZE,
    volume_size: int | str = D_FILE_VOLUME_SIZE,
    mode: DatasetChangeMode | str = DatasetChangeMode.PATCH,
    cache: bool = True,
    tags: t.List[str] | None = None,
    ) -> Dataset:

    Parameters

    • name: (str, required)
      • dataset name.
    • repo: (str, required)
    • subset: (str, optional)
      • The subset name. If the huggingface dataset has multiple subsets, you must specify the subset name.
    • split: (str, optional)
      • The split name. If the split name is not specified, the all splits dataset will be built.
    • revision: (str, optional)
      • The huggingface datasets revision. The default value is main. If the split name is not specified, the all splits dataset will be built.
    • alignment_size: (int|str, optional)
      • The blob alignment size.
      • The default value is 128 Bytes.
    • volume_size: (int|str, optional)
      • The maximum size of a dataset blob file. A new blob file will be generated when the size exceeds this limit.
      • The default value is 64MB.
    • mode: (str|DatasetChangeMode, optional)
      • The dataset change mode. The default value is patch. Mode choices are patch and overwrite.
    • cache: (bool, optional)
      • Whether to use huggingface dataset cache(download + local hf dataset).
      • The default value is True.
    • tags: (List[str], optional)
      • The user custom tags of the dataset.

    Examples

    from starwhale import Dataset
    myds = Dataset.from_huggingface("mnist", "mnist")
    print(myds[0])
    from starwhale import Dataset
    myds = Dataset.from_huggingface("mmlu", "cais/mmlu", subset="anatomy", split="auxiliary_train", revision="7456cfb")

    from_json

    from_json is a classmethod that can convert a json text into a Starwhale dataset.

    @classmethod
    def from_json(
    cls,
    name: str,
    json_text: str,
    field_selector: str = "",
    alignment_size: int | str = D_ALIGNMENT_SIZE,
    volume_size: int | str = D_FILE_VOLUME_SIZE,
    mode: DatasetChangeMode | str = DatasetChangeMode.PATCH,
    tags: t.List[str] | None = None,
    ) -> Dataset:

    Parameters

    • name: (str, required)
      • Dataset name.
    • json_text: (str, required)
      • A json string. The from_json function deserializes this string into Python objects to start building the Starwhale dataset.
    • field_selector: (str, optional)
      • The filed from which you would like to extract dataset array items.
      • The default value is "" which indicates that the json object is an array contains all the items.
    • alignment_size: (int|str, optional)
      • The blob alignment size.
      • The default value is 128 Bytes.
    • volume_size: (int|str, optional)
      • The maximum size of a dataset blob file. A new blob file will be generated when the size exceeds this limit.
      • The default value is 64MB.
    • mode: (str|DatasetChangeMode, optional)
      • The dataset change mode. The default value is patch. Mode choices are patch and overwrite.
    • tags: (List[str], optional)
      • The user custom tags of the dataset.

    Examples

    from starwhale import Dataset
    myds = Dataset.from_json(
    name="translation",
    json_text='[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]'
    )
    print(myds[0].features.en)
    from starwhale import Dataset
    myds = Dataset.from_json(
    name="translation",
    json_text='{"content":{"child_content":[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]}}',
    field_selector="content.child_content"
    )
    print(myds[0].features["zh-cn"])

    from_folder

    from_folder is a classmethod that can read Image/Video/Audio data from a specified directory and automatically convert them into a Starwhale dataset. This function supports the following features:

    • It can recursively search the target directory and its subdirectories
    • Supports extracting three types of files:
      • image: Supports png/jpg/jpeg/webp/svg/apng image types. Image files will be converted to Starwhale.Image type.
      • video: Supports mp4/webm/avi video types. Video files will be converted to Starwhale.Video type.
      • audio: Supports mp3/wav audio types. Audio files will be converted to Starwhale.Audio type.
    • Each file corresponds to one record in the dataset, with the file stored in the file field.
    • If auto_label=True, the parent directory name will be used as the label for that record, stored in the label field. Files in the root directory will not be labeled.
    • If a txt file with the same name as an image/video/audio file exists, its content will be stored as the caption field in the dataset.
    • If metadata.csv or metadata.jsonl exists in the root directory, their content will be read automatically and associated to records by file path as meta information in the dataset.
      • metadata.csv and metadata.jsonl are mutually exclusive. An exception will be thrown if both exist.
      • Each record in metadata.csv and metadata.jsonl must contain a file_name field pointing to the file path.
      • metadata.csv and metadata.jsonl are optional for dataset building.
    @classmethod
    def from_folder(
    cls,
    folder: str | Path,
    kind: str | DatasetFolderSourceType,
    name: str | Resource = "",
    auto_label: bool = True,
    alignment_size: int | str = D_ALIGNMENT_SIZE,
    volume_size: int | str = D_FILE_VOLUME_SIZE,
    mode: DatasetChangeMode | str = DatasetChangeMode.PATCH,
    tags: t.List[str] | None = None,
    ) -> Dataset:

    Parameters

    • folder: (str|Path, required)
      • The folder path from which you would like to create this dataset.
    • kind: (str|DatasetFolderSourceType, required)
      • The dataset source type you would like to use, the choices are: image, video and audio.
      • Recursively searching for files of the specified kind in folder. Other file types will be ignored.
    • name: (str|Resource, optional)
      • The dataset name you would like to use.
      • If not specified, the name is the folder name.
    • auto_label: (bool, optional)
      • Whether to auto label by the sub-folder name.
      • The default value is True.
    • alignment_size: (int|str, optional)
      • The blob alignment size.
      • The default value is 128 Bytes.
    • volume_size: (int|str, optional)
      • The maximum size of a dataset blob file. A new blob file will be generated when the size exceeds this limit.
      • The default value is 64MB.
    • mode: (str|DatasetChangeMode, optional)
      • The dataset change mode. The default value is patch. Mode choices are patch and overwrite.
    • tags: (List[str], optional)
      • The user custom tags of the dataset.

    Examples ${folder-example}

    • Example for the normal function calling

      from starwhale import Dataset

      # create a my-image-dataset dataset from /path/to/image folder.
      ds = Dataset.from_folder(
      folder="/path/to/image",
      kind="image",
      name="my-image-dataset"
      )
    • Example for caption

      folder/dog/1.png
      folder/dog/1.txt

      1.txt content will be used as the caption of 1.png.

    • Example for metadata

      metadata.csv:

      file_name, caption
      1.png, dog
      2.png, cat

      metadata.jsonl:

      {"file_name": "1.png", "caption": "dog"}
      {"file_name": "2.png", "caption": "cat"}
    • Example for auto-labeling

      The following structure will create a dataset with 2 labels: "cat" and "dog", 4 images in total.

      folder/dog/1.png
      folder/cat/2.png
      folder/dog/3.png
      folder/cat/4.png

    __iter__

    __iter__ a method that iter the dataset rows.

    from starwhale import dataset

    ds = dataset("mnist")

    for item in ds:
    print(item.index)
    print(item.features.label) # label and img are the features of mnist.
    print(item.features.img)

    batch_iter

    batch_iter is a method that iter the dataset rows in batch.

    def batch_iter(
    self, batch_size: int = 1, drop_not_full: bool = False
    ) -> t.Iterator[t.List[DataRow]]:

    Parameters

    • batch_size: (int, optional)
      • batch size. The default value is 1.
    • drop_not_full: (bool, optional)
      • Whether the last batch of data, with a size smaller than batch_size, it will be discarded.
      • The default value is False.

    Examples

    from starwhale import dataset

    ds = dataset("mnist")
    for batch_rows in ds.batch_iter(batch_size=2):
    assert len(batch_rows) == 2
    print(batch_rows[0].features)

    __getitem__

    __getitem__ is a method that allows retrieving certain rows of data from the dataset, with usage similar to Python dict and list types.

    from starwhale import dataset

    ds = dataset("mock-int-index")

    # if the index type is string
    ds["str_key"] # get the DataRow by the "str_key" string key
    ds["start":"end"] # get a slice of the dataset by the range ("start", "end")

    ds = dataset("mock-str-index")
    # if the index type is int
    ds[1] # get the DataRow by the 1 int key
    ds[1:10:2] # get a slice of the dataset by the range (1, 10), step is 2

    __setitem__

    __setitem__ is a method that allows updating rows of data in the dataset, with usage similar to Python dicts. __setitem__ supports multi-threaded parallel data insertion.

    def __setitem__(
    self, key: t.Union[str, int], value: t.Union[DataRow, t.Tuple, t.Dict]
    ) -> None:

    Parameters

    • key: (int|str, required)
      • key is the index for each row in the dataset. The type is int or str, but a dataset only accepts one type.
    • value: (DataRow|tuple|dict, required)
      • value is the features for each row in the dataset, using a Python dict is generally recommended.

    Examples

    • Normal insertion

    Insert two rows into the test dataset, with index test and test2 repectively:

    from starwhale import dataset

    with dataset("test") as ds:
    ds["test"] = {"txt": "abc", "int": 1}
    ds["test2"] = {"txt": "bcd", "int": 2}
    ds.commit()
    • Parallel insertion
    from starwhale import dataset, Binary
    from concurrent.futures import as_completed, ThreadPoolExecutor

    ds = dataset("test")

    def _do_append(_start: int) -> None:
    for i in range(_start, 100):
    ds.append((i, {"data": Binary(), "label": i}))

    pool = ThreadPoolExecutor(max_workers=10)
    tasks = [pool.submit(_do_append, i * 10) for i in range(0, 9)]

    ds.commit()
    ds.close()

    __delitem__

    __delitem__ is a method to delete certain rows of data from the dataset.

    def __delitem__(self, key: _ItemType) -> None:
    from starwhale import dataset

    ds = dataset("existed-ds")
    del ds[6:9]
    del ds[0]
    ds.commit()
    ds.close()

    append

    append is a method to append data to a dataset, similar to the append method for Python lists.

    • Adding features dict, each row is automatically indexed with int starting from 0 and incrementing.

      from starwhale import dataset, Image

      with dataset("new-ds") as ds:
      for i in range(0, 100):
      ds.append({"label": i, "image": Image(f"folder/{i}.png")})
      ds.commit()
    • By appending the index and features dictionary, the index of each data row in the dataset will not be handled automatically.

      from dataset import dataset, Image

      with dataset("new-ds") as ds:
      for i in range(0, 100):
      ds.append((f"index-{i}", {"label": i, "image": Image(f"folder/{i}.png")}))

      ds.commit()

    extend

    extend is a method to bulk append data to a dataset, similar to the extend method for Python lists.

    from starwhale import dataset, Text

    ds = dataset("new-ds")
    ds.extend([
    (f"label-{i}", {"text": Text(), "label": i}) for i in range(0, 10)
    ])
    ds.commit()
    ds.close()

    commit

    commit is a method that flushes the current cached data to storage when called, and generates a dataset version. This version can then be used to load the corresponding dataset content afterwards.

    For a dataset, if some data is added without calling commit, but close is called or the process exits directly instead, the data will still be written to the dataset, just without generating a new version.

    @_check_readonly
    def commit(
    self,
    tags: t.Optional[t.List[str]] = None,
    message: str = "",
    force_add_tags: bool = False,
    ignore_add_tags_errors: bool = False,
    ) -> str:

    Parameters

    • tags: (list(str), optional)
      • tag as a list
    • message: (str, optional)
      • commit message. The default value is empty.
    • force_add_tags: (bool, optional)
      • For server/cloud instances, when adding labels to this version, if a label has already been applied to other dataset versions, you can use the force_add_tags=True parameter to forcibly add the label to this version, otherwise an exception will be thrown.
      • The default is False.
    • ignore_add_tags_errors: (bool, optional)
      • Ignore any exceptions thrown when adding labels.
      • The default is False.

    Examples

    from starwhale import dataset
    with dataset("mnist") as ds:
    ds.append({"label": 1})
    ds.commit(message="init commit")

    readonly

    readonly is a property attribute indicating if the dataset is read-only, it returns a bool value.

    from starwhale import dataset
    ds = dataset("mnist", readonly=True)
    assert ds.readonly

    loading_version

    loading_version is a property attribute, string type.

    • When loading an existing dataset, the loading_version is the related dataset version.
    • When creating a non-existed dataset, the loading_version is equal to the pending_commit_version.

    pending_commit_version

    pending_commit_version is a property attribute, string type. When you call the commit function, the pending_commit_version will be recorded in the Standalone instance ,Server instance or Cloud instance.

    committed_version

    committed_version is a property attribute, string type. After the commit function is called, the committed_version will come out, it is equal to the pending_commit_version. Accessing this attribute without calling commit first will raise an exception.

    remove

    remove is a method equivalent to the swcli dataset remove command, it can delete a dataset.

    def remove(self, force: bool = False) -> None:

    recover

    recover is a method equivalent to the swcli dataset recover command, it can recover a soft-deleted dataset that has not been run garbage collection.

    def recover(self, force: bool = False) -> None:

    summary

    summary is a method equivalent to the swcli dataset summary command, it returns summary information of the dataset.

    def summary(self) -> t.Optional[DatasetSummary]:

    history

    history is a method equivalent to the swcli dataset history command, it returns the history records of the dataset.

    def history(self) -> t.List[t.Dict]:

    flush

    flush is a method that flushes temporarily cached data from memory to persistent storage. The commit and close methods will automatically call flush.

    close

    close is a method that closes opened connections related to the dataset. Dataset also implements contextmanager, so datasets can be automatically closed using with syntax without needing to explicitly call close.

    from starwhale import dataset

    ds = dataset("mnist")
    ds.close()

    with dataset("mnist") as ds:
    print(ds[0])

    head is a method to show the first n rows of a dataset, equivalent to the swcli dataset head command.

    def head(self, n: int = 5, skip_fetch_data: bool = False) -> List[DataRow]:

    fetch_one

    fetch_one is a method to get the first record in a dataset, similar to head(n=1)[0].

    list

    list is a class method to list Starwhale datasets under a project URI, equivalent to the swcli dataset list command.

    @classmethod
    def list(
    cls,
    project_uri: Union[str, Project] = "",
    fullname: bool = False,
    show_removed: bool = False,
    page_index: int = DEFAULT_PAGE_IDX,
    page_size: int = DEFAULT_PAGE_SIZE,
    ) -> Tuple[List[Dict[str, Any]], Dict[str, Any]]:

    copy

    copy is a method to copy a dataset to another instance, equivalent to the swcli dataset copy command.

    def copy(
    self,
    dest_uri: str,
    dest_local_project_uri: str = "",
    force: bool = False,
    mode: str = DatasetChangeMode.PATCH.value,
    ignore_tags: t.List[str] | None = None,
    ) -> None:

    Parameters

    • dest_uri: (str, required)
      • Dataset URI
    • dest_local_project_uri: (str, optional)
      • When copy the remote dataset into local, the parameter can set for the Project URI.
    • force: (bool, optional)
      • Whether to forcibly overwrite the dataset if there is already one with the same version on the target instance.
      • The default value is False.
      • When the tags are already used for the other dataset version in the dest instance, you should use force option or adjust the tags.
    • mode: (str, optional)
      • Dataset copy mode, default is 'patch'. Mode choices are: 'patch', 'overwrite'.
      • patch: Patch mode, only update the changed rows and columns for the remote dataset.
      • overwrite: Overwrite mode, update records and delete extraneous rows from the remote dataset.
    • ignore_tags (List[str], optional)
      • Ignore tags when copying.
      • In default, copy dataset with all user custom tags.
      • latest and ^v\d+$ are the system builtin tags, they are ignored automatically.

    Examples

    from starwhale import dataset
    ds = dataset("mnist")
    ds.copy("cloud://remote-instance/project/starwhale")

    to_pytorch

    to_pytorch is a method that can convert a Starwhale dataset to a Pytorch torch.utils.data.Dataset, which can then be passed to torch.utils.data.DataLoader for use.

    It should be noted that the to_pytorch function returns a Pytorch IterableDataset.

    def to_pytorch(
    self,
    transform: t.Optional[t.Callable] = None,
    drop_index: bool = True,
    skip_default_transform: bool = False,
    ) -> torch.utils.data.Dataset:

    Parameters

    • transform: (callable, optional)
      • A transform function for input data.
    • drop_index: (bool, optional)
      • Whether to drop the index column.
    • skip_default_transform: (bool, optional)
      • If transform is not set, by default the built-in Starwhale transform function will be used to transform the data. This can be disabled with the skip_default_transform parameter.

    Examples

    import torch.utils.data as tdata
    from starwhale import dataset

    ds = dataset("mnist")

    torch_ds = ds.to_pytorch()
    torch_loader = tdata.DataLoader(torch_ds, batch_size=2)
    import torch.utils.data as tdata
    from starwhale import dataset

    with dataset("mnist") as ds:
    for i in range(0, 10):
    ds.append({"txt": Text(f"data-{i}"), "label": i})

    ds.commit()

    def _custom_transform(data: t.Any) -> t.Any:
    data = data.copy()
    txt = data["txt"].to_str()
    data["txt"] = f"custom-{txt}"
    return data

    torch_loader = tdata.DataLoader(
    dataset(ds.uri).to_pytorch(transform=_custom_transform), batch_size=1
    )
    item = next(iter(torch_loader))
    assert isinstance(item["label"], torch.Tensor)
    assert item["txt"][0] in ("custom-data-0", "custom-data-1")

    to_tensorflow

    to_tensorflow is a method that can convert a Starwhale dataset to a Tensorflow tensorflow.data.Dataset.

    def to_tensorflow(self, drop_index: bool = True) -> tensorflow.data.Dataset:

    Parameters

    • drop_index: (bool, optional)
      • Whether to drop the index column.

    Examples

    from starwhale import dataset
    import tensorflow as tf

    ds = dataset("mnist")
    tf_ds = ds.to_tensorflow(drop_index=True)
    assert isinstance(tf_ds, tf.data.Dataset)

    with_builder_blob_config

    with_builder_blob_config is a method to set blob-related attributes in a Starwhale dataset. It needs to be called before making data changes.

    def with_builder_blob_config(
    self,
    volume_size: int | str | None = D_FILE_VOLUME_SIZE,
    alignment_size: int | str | None = D_ALIGNMENT_SIZE,
    ) -> Dataset:

    Parameters

    • alignment_size: (int|str, optional)
      • The blob alignment size.
      • The default value is 128 Bytes.
    • volume_size: (int|str, optional)
      • The maximum size of a dataset blob file. A new blob file will be generated when the size exceeds this limit.
      • The default value is 64MB.

    Examples

    from starwhale import dataset, Binary

    ds = dataset("mnist").with_builder_blob_config(volume_size="32M", alignment_size=128)
    ds.append({"data": Binary(b"123")})
    ds.commit()
    ds.close()

    with_loader_config

    with_loader_config is a method to set parameters for the Starwhale dataset loader process.

    def with_loader_config(
    self,
    num_workers: t.Optional[int] = None,
    cache_size: t.Optional[int] = None,
    field_transformer: t.Optional[t.Dict] = None,
    ) -> Dataset:

    Parameters

    • num_workers: (int, optional)
      • The workers number for loading dataset.
      • The default value is 2.
    • cache_size: (int, optional)
      • Prefetched data rows.
      • The default value is 20.
    • field_transformer: (dict, optional)
      • features name transform dict.

    Examples

    from starwhale import Dataset, dataset
    Dataset.from_json(
    "translation",
    '[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]'
    )
    myds = dataset("translation").with_loader_config(field_transformer={"en": "en-us"})
    assert myds[0].features["en-us"] == myds[0].features["en"]
    from starwhale import Dataset, dataset
    Dataset.from_json(
    "translation2",
    '[{"content":{"child_content":[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]}}]'
    )
    myds = dataset("translation2").with_loader_config(field_transformer={"content.child_content[0].en": "en-us"})
    assert myds[0].features["en-us"] == myds[0].features["content"]["child_content"][0]["en"]
    - + \ No newline at end of file diff --git a/0.5.10/reference/sdk/evaluation/index.html b/0.5.10/reference/sdk/evaluation/index.html index 2106d4eda..6360be544 100644 --- a/0.5.10/reference/sdk/evaluation/index.html +++ b/0.5.10/reference/sdk/evaluation/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.10

    Starwhale Model Evaluation SDK

    @evaluation.predict

    The @evaluation.predict decorator defines the inference process in the Starwhale Model Evaluation, similar to the map phase in MapReduce. It contains the following core features:

    • On the Server instance, require the resources needed to run.
    • Automatically read the local or remote datasets, and pass the data in the datasets one by one or in batches to the function decorated by evaluation.predict.
    • By the replicas setting, implement distributed dataset consumption to horizontally scale and shorten the time required for the model evaluation tasks.
    • Automatically store the return values of the function and the input features of the dataset into the results table, for display in the Web UI and further use in the evaluate phase.
    • The decorated function is called once for each single piece of data or each batch, to complete the inference process.

    Parameters

    • resources: (dict, optional)
      • Defines the resources required by each predict task when running on the Server instance, including mem, cpu, and nvidia.com/gpu.
      • mem: The unit is Bytes, int and float types are supported.
        • Supports setting request and limit as a dictionary, e.g. resources={"mem": {"request": 100 * 1024, "limit": 200 * 1024}}.
        • If only a single number is set, the Python SDK will automatically set request and limit to the same value, e.g. resources={"mem": 100 * 1024} is equivalent to resources={"mem": {"request": 100 * 1024, "limit": 100 * 1024}}.
      • cpu: The unit is the number of CPU cores, int and float types are supported.
        • Supports setting request and limit as a dictionary, e.g. resources={"cpu": {"request": 1, "limit": 2}}.
        • If only a single number is set, the SDK will automatically set request and limit to the same value, e.g. resources={"cpu": 1.5} is equivalent to resources={"cpu": {"request": 1.5, "limit": 1.5}}.
      • nvidia.com/gpu: The unit is the number of GPUs, int type is supported.
        • nvidia.com/gpu does not support setting request and limit, only a single number is supported.
      • Note: The resources parameter currently only takes effect on the Server instances. For the Cloud instances, the same can be achieved by selecting the corresponding resource pool when submitting the evaluation task. Standalone instances do not support this feature at all.
    • replicas: (int, optional)
      • The number of replicas to run predict.
      • predict defines a Step, in which there are multiple equivalent Tasks. Each Task runs on a Pod in Cloud/Server instances, and a Thread in Standalone instances.
      • When multiple replicas are specified, they are equivalent and will jointly consume the selected dataset to achieve distributed dataset consumption. It can be understood that a row in the dataset will only be read by one predict replica.
      • The default is 1.
    • batch_size: (int, optional)
      • Batch size for passing data from the dataset into the function.
      • The default is 1.
    • fail_on_error: (bool, optional)
      • Whether to interrupt the entire model evaluation when the decorated function throws an exception. If you expect some "exceptional" data to cause evaluation failures but don't want to interrupt the overall evaluation, you can set fail_on_error=False.
      • The default is True.
    • auto_log: (bool, optional)
      • Whether to automatically log the return values of the function and the input features of the dataset to the results table.
      • The default is True.
    • log_mode: (str, optional)
      • When auto_log=True, you can set log_mode to define logging the return values in plain or pickle format.
      • The default is pickle.
    • log_dataset_features: (List[str], optional)
      • When auto_log=True, you can selectively log certain features from the dataset via this parameter.
      • By default, all features will be logged.
    • needs: (List[Callable], optional)
      • Defines the prerequisites for this task to run, can use the needs syntax to implement DAG.
      • needs accepts functions decorated by @evaluation.predict, @evaluation.evaluate, and @handler.
      • The default is empty, i.e. does not depend on any other tasks.

    Input

    The decorated functions need to define some input parameters to accept dataset data, etc. They contain the following patterns:

    • data:

      • data is a dict type that can read the features of the dataset.
      • When batch_size=1 or batch_size is not set, the label feature can be read through data['label'] or data.label.
      • When batch_size is set to > 1, data is a list.
      from starwhale import evaluation

      @evaluation.predict
      def predict(data):
      print(data['label'])
      print(data.label)
    • data + external:

      • data is a dict type that can read the features of the dataset.
      • external is also a dict, including: index, index_with_dataset, dataset_info, context and dataset_uri keys. The attributes can be used for the further fine-grained processing.
        • index: The index of the dataset row.
        • index_with_dataset: The index with the dataset info.
        • dataset_info: starwhale.core.dataset.tabular.TabularDatasetInfo Class.
        • context: starwhale.Context Class.
        • dataset_uri: starwhale.nase.uri.resource.Resource Class.
      from starwhale import evaluation

      @evaluation.predict
      def predict(data, external):
      print(data['label'])
      print(data.label)
      print(external["context"])
      print(external["dataset_uri"])
    • data + **kw:

      • data is a dict type that can read the features of the dataset.
      • kw is a dict that contains external.
      from starwhale import evaluation

      @evaluation.predict
      def predict(data, **kw):
      print(kw["external"]["context"])
      print(kw["external"]["dataset_uri"])
    • *args + **kwargs:

      • The first argument of args list is data.
      from starwhale import evaluation

      @evaluation.predict
      def predict(*args, **kw):
      print(args[0].label)
      print(args[0]["label"])
      print(kw["external"]["context"])
    • **kwargs:

      from starwhale import evaluation

      @evaluation.predict
      def predict(**kw):
      print(kw["data"].label)
      print(kw["data"]["label"])
      print(kw["external"]["context"])
    • *args:

      • *args does not contain external.
      from starwhale import evaluation

      @evaluation.predict
      def predict(*args):
      print(args[0].label)
      print(args[0]["label"])

    Examples

    from starwhale import evaluation

    @evaluation.predict
    def predict_image(data):
    ...

    @evaluation.predict(
    dataset="mnist/version/latest",
    batch_size=32,
    replicas=4,
    needs=[predict_image],
    )
    def predict_batch_images(batch_data)
    ...

    @evaluation.predict(
    resources={"nvidia.com/gpu": 1,
    "cpu": {"request": 1, "limit": 2},
    "mem": 200 * 1024}, # 200MB
    log_mode="plain",
    )
    def predict_with_resources(data):
    ...

    @evaluation.predict(
    replicas=1,
    log_mode="plain",
    log_dataset_features=["txt", "img", "label"],
    )
    def predict_with_selected_features(data):
    ...

    @evaluation.evaluate

    @evaluation.evaluate is a decorator that defines the evaluation process in the Starwhale Model evaluation, similar to the reduce phase in MapReduce. It contains the following core features:

    • On the Server instance, apply for the resources.
    • Read the data recorded in the results table automatically during the predict phase, and pass it into the function as an iterator.
    • The evaluate phase will only run one replica, and cannot define the replicas parameter like the predict phase.

    Parameters

    • resources: (dict, optional)
      • Consistent with the resources parameter definition in @evaluation.predict.
    • needs: (List[Callable], optional)
      • Consistent with the needs parameter definition in @evaluation.predict.
      • In the common case, it will depend on a function decorated by @evaluation.predict.
    • use_predict_auto_log: (bool, optional)
      • Defaults to True, passes an iterator that can traverse the predict results to the function.

    Input

    • When use_predict_auto_log=True (default), pass an iterator that can traverse the predict results into the function.
      • The iterated object is a dictionary containing two keys: output and input.
        • output is the element returned by the predict stage function.
        • input is the features of the corresponding dataset during the inference process, which is a dictionary type.
    • When use_predict_auto_log=False, do not pass any parameters into the function.

    Examples

    from starwhale import evaluation

    @evaluation.evaluate(needs=[predict_image])
    def evaluate_results(predict_result_iter):
    ...

    @evaluation.evaluate(
    use_predict_auto_log=False,
    needs=[predict_image],
    )
    def evaluate_results():
    ...

    evaluation.log

    evaluation.log is a function that logs the certain evaluation metrics to the specific tables, which can be viewed as the Web page in the Server/Cloud instance.

    Parameters

    • category: (str, required)
      • The category of the logged record, which will be used as a suffix for the Starwhale Datastore table name.
      • Each category corresponds to a Starwhale Datastore table, with these tables isolated by evaluation task ID without affecting each other.
    • id: (str|int, required)
      • The ID of the logged record, unique within the table.
      • Only one type, either str or int, can be used as ID type in the same table.
    • metrics: (dict, required)
      • A dictionary recording metrics in key-value pairs.

    Examples

    from starwhale import evaluation

    evaluation.log("label/1", 1, {"loss": 0.99, "accuracy": 0.98})
    evaluation.log("ppl", "1", {"a": "test", "b": 1})

    evaluation.log_summary

    evaluation.log_summary is a function that logs the certain metrics to the summary table. The evaluation page of a Server/Cloud instance displays data from the summary table.

    Each time it is called, Starwhale automatically updates the table using the unique ID of the current evaluation as the row ID. This function can be called multiple times during an evaluation to update different columns.

    Each project has one summary table, and all evaluation jobs under that project will log their summary information into this table.

    @classmethod
    def log_summary(cls, *args: t.Any, **kw: t.Any) -> None:

    Examples

    from starwhale import evaluation

    evaluation.log_summary(loss=0.99)
    evaluation.log_summary(loss=0.99, accuracy=0.99)
    evaluation.log_summary({"loss": 0.99, "accuracy": 0.99})

    evaluation.iter

    evaluation.iter is a function that returns an iterator for reading data iteratively from certain model evaluation tables.

    @classmethod
    def iter(cls, category: str) -> t.Iterator:

    Parameters

    • category: (str, required)
      • This parameter is consistent with the meaning of the category parameter in the evaluation.log function.

    Examples

    from starwhale import evaluation

    results = [data for data in evaluation.iter("label/0")]

    @handler

    @handler is a decorator that provides the following functionalities:

    • On a Server instance, it requests the required resources to run.
    • It can control the number of replicas.
    • Multiple handlers can form a DAG through dependency relationships to control the execution workflow.
    • It can expose ports externally to run like a web handler.

    @fine_tune, @evaluation.predict and @evaluation.evalute can be considered applications of @handler in the certain specific areas. @handler is the underlying implementation of these decorators and is more fundamental and flexible.

    @classmethod
    def handler(
    cls,
    resources: t.Optional[t.Dict[str, t.Any]] = None,
    replicas: int = 1,
    needs: t.Optional[t.List[t.Callable]] = None,
    name: str = "",
    expose: int = 0,
    require_dataset: bool = False,
    ) -> t.Callable:

    Parameters

    • resources: (dict, optional)
      • Consistent with the resources parameter definition in @evaluation.predict.
    • needs: (List[Callable], optional)
      • Consistent with the needs parameter definition in @evaluation.predict.
    • replicas: (int, optional)
      • Consistent with the replicas parameter definition in @evaluation.predict.
    • name: (str, optional)
      • The name displayed for the handler.
      • If not specified, use the decorated function's name.
    • expose: (int, optional)
      • The port exposed externally. When running a web handler, the exposed port needs to be declared.
      • The default is 0, meaning no port is exposed.
      • Currently only one port can be exposed.
    • require_dataset: (bool, optional)
      • Defines whether this handler requires a dataset when running.
      • If required_dataset=True, the user is required to input a dataset when creating an evaluation task on the Server/Cloud instance web page. If required_dataset=False, the user does not need to specify a dataset on the web page.
      • The default is False.

    Examples

    from starwhale import handler
    import gradio

    @handler(resources={"cpu": 1, "nvidia.com/gpu": 1}, replicas=3)
    def my_handler():
    ...

    @handler(needs=[my_handler])
    def my_another_handler():
    ...

    @handler(expose=7860)
    def chatbot():
    with gradio.Blocks() as server:
    ...
    server.launch(server_name="0.0.0.0", server_port=7860)

    @fine_tune

    fine_tune is a decorator that defines the fine-tuning process for model training.

    Some restrictions and usage suggestions:

    • fine_tune has only one replica.
    • fine_tune requires dataset input.
    • Generally, the dataset is obtained through Context.get_runtime_context() at the start of fine_tune.
    • Generally, at the end of fine_tune, the fine-tuned Starwhale model package is generated through starwhale.model.build, which will be automatically copied to the corresponding evaluation project.

    Parameters

    • resources: (dict, optional)
      • Consistent with the resources parameter definition in @evaluation.predict.
    • needs: (List[Callable], optional)
      • Consistent with the needs parameter definition in @evaluation.predict.

    Examples

    from starwhale import model as starwhale_model
    from starwhale import fine_tune, Context

    @fine_tune(resources={"nvidia.com/gpu": 1})
    def llama_fine_tuning():
    ctx = Context.get_runtime_context()

    if len(ctx.dataset_uris) == 2:
    # TODO: use more graceful way to get train and eval dataset
    train_dataset = dataset(ctx.dataset_uris[0], readonly=True, create="forbid")
    eval_dataset = dataset(ctx.dataset_uris[1], readonly=True, create="forbid")
    elif len(ctx.dataset_uris) == 1:
    train_dataset = dataset(ctx.dataset_uris[0], readonly=True, create="forbid")
    eval_dataset = None
    else:
    raise ValueError("Only support 1 or 2 datasets(train and eval dataset) for now")

    #user training code
    train_llama(
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    )

    model_name = get_model_name()
    starwhale_model.build(name=f"llama-{model_name}-qlora-ft")

    @multi_classification

    The @multi_classification decorator uses the sklearn lib to analyze results for multi-classification problems, outputting the confusion matrix, ROC, AUC etc., and writing them to related tables in the Starwhale Datastore.

    When using it, certain requirements are placed on the return value of the decorated function, which should be (label, result) or (label, result, probability_matrix).

    def multi_classification(
    confusion_matrix_normalize: str = "all",
    show_hamming_loss: bool = True,
    show_cohen_kappa_score: bool = True,
    show_roc_auc: bool = True,
    all_labels: t.Optional[t.List[t.Any]] = None,
    ) -> t.Any:

    Parameters

    • confusion_matrix_normalize: (str, optional)
      • Accepts three parameters:
        • true: rows
        • pred: columns
        • all: rows+columns
    • show_hamming_loss: (bool, optional)
      • Whether to calculate the Hamming loss.
      • The default is True.
    • show_cohen_kappa_score: (bool, optional)
      • Whether to calculate the Cohen kappa score.
      • The default is True.
    • show_roc_auc: (bool, optional)
      • Whether to calculate ROC/AUC. To calculate, the function needs to return a (label, result, probability_matrix) tuple, otherwise a (label, result) tuple is sufficient.
      • The default is True.
    • all_labels: (List, optional)
      • Defines all the labels.

    Examples


    @multi_classification(
    confusion_matrix_normalize="all",
    show_hamming_loss=True,
    show_cohen_kappa_score=True,
    show_roc_auc=True,
    all_labels=[i for i in range(0, 10)],
    )
    def evaluate(ppl_result) -> t.Tuple[t.List[int], t.List[int], t.List[t.List[float]]]:
    label, result, probability_matrix = [], [], []
    return label, result, probability_matrix

    @multi_classification(
    confusion_matrix_normalize="all",
    show_hamming_loss=True,
    show_cohen_kappa_score=True,
    show_roc_auc=False,
    all_labels=[i for i in range(0, 10)],
    )
    def evaluate(ppl_result) -> t.Tuple[t.List[int], t.List[int], t.List[t.List[float]]]:
    label, result = [], [], []
    return label, result

    PipelineHandler

    The PipelineHandler class provides a default model evaluation workflow definition that requires users to implement the predict and evaluate functions.

    The PipelineHandler is equivalent to using the @evaluation.predict and @evaluation.evaluate decorators together - the usage looks different but the underlying model evaluation process is the same.

    Note that PipelineHandler currently does not support defining resources parameters.

    Users need to implement the following functions:

    • predict: Defines the inference process, equivalent to a function decorated with @evaluation.predict.

    • evaluate: Defines the evaluation process, equivalent to a function decorated with @evaluation.evaluate.

    from typing import Any, Iterator
    from abc import ABCMeta, abstractmethod

    class PipelineHandler(metaclass=ABCMeta):
    def __init__(
    self,
    predict_batch_size: int = 1,
    ignore_error: bool = False,
    predict_auto_log: bool = True,
    predict_log_mode: str = PredictLogMode.PICKLE.value,
    predict_log_dataset_features: t.Optional[t.List[str]] = None,
    **kwargs: t.Any,
    ) -> None:
    self.context = Context.get_runtime_context()
    ...

    def predict(self, data: Any, **kw: Any) -> Any:
    raise NotImplementedError

    def evaluate(self, ppl_result: Iterator) -> Any
    raise NotImplementedError

    Parameters

    • predict_batch_size: (int, optional)
      • Equivalent to the batch_size parameter in @evaluation.predict.
      • Default is 1.
    • ignore_error: (bool, optional)
      • Equivalent to the fail_on_error parameter in @evaluation.predict.
      • Default is False.
    • predict_auto_log: (bool, optional)
      • Equivalent to the auto_log parameter in @evaluation.predict.
      • Default is True.
    • predict_log_mode: (str, optional)
      • Equivalent to the log_mode parameter in @evaluation.predict.
      • Default is pickle.
    • predict_log_dataset_features: (bool, optional)
      • Equivalent to the log_dataset_features parameter in @evaluation.predict.
      • Default is None, which records all features.

    Examples

    import typing as t

    import torch
    from starwhale import PipelineHandler

    class Example(PipelineHandler):
    def __init__(self) -> None:
    super().__init__()
    self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    self.model = self._load_model(self.device)

    def predict(self, data: t.Dict):
    data_tensor = self._pre(data.img)
    output = self.model(data_tensor)
    return self._post(output)

    def evaluate(self, ppl_result):
    result, label, pr = [], [], []
    for _data in ppl_result:
    label.append(_data["input"]["label"])
    result.extend(_data["output"][0])
    pr.extend(_data["output"][1])
    return label, result, pr

    def _pre(self, input: Image) -> torch.Tensor:
    ...

    def _post(self, input):
    ...

    def _load_model(self, device):
    ...

    Context

    The context information passed during model evaluation, including Project, Task ID, etc. The Context content is automatically injected and can be used in the following ways:

    • Inherit the PipelineHandler class and use the self.context object.
    • Get it through Context.get_runtime_context().

    Note that Context can only be used during model evaluation, otherwise the program will throw an exception.

    Currently Context can get the following values:

    • project: str
      • Project name.
    • version: str
      • Unique ID of model evaluation.
    • step: str
      • Step name.
    • total: int
      • Total number of Tasks under the Step.
    • index: int
      • Task index number, starting from 0.
    • dataset_uris: List[str]
      • List of Starwhale dataset URIs.

    Examples


    from starwhale import Context, PipelineHandler

    def func():
    ctx = Context.get_runtime_context()
    print(ctx.project)
    print(ctx.version)
    print(ctx.step)
    ...

    class Example(PipelineHandler):

    def predict(self, data: t.Dict):
    print(self.context.project)
    print(self.context.version)
    print(self.context.step)

    @starwhale.api.service.api

    @starwhale.api.service.api is a decorator that provides a simple Web Handler input definition based on Gradio for accepting external requests and returning inference results to the user when launching a Web Service with the swcli model serve command, enabling online evaluation.

    Examples

    import gradio
    from starwhale.api.service import api

    def predict_image(img):
    ...

    @api(gradio.File(), gradio.Label())
    def predict_view(file: t.Any) -> t.Any:
    with open(file.name, "rb") as f:
    data = Image(f.read(), shape=(28, 28, 1))
    _, prob = predict_image({"img": data})
    return {i: p for i, p in enumerate(prob)}

    starwhale.api.service.Service

    If you want to customize the web service implementation, you can subclass Service and override the serve method.

    class CustomService(Service):
    def serve(self, addr: str, port: int, handler_list: t.List[str] = None) -> None:
    ...

    svc = CustomService()

    @svc.api(...)
    def handler(data):
    ...

    Notes:

    • Handlers added with PipelineHandler.add_api and the api decorator or Service.api can work together
    • If using a custom Service, you need to instantiate the custom Service class in the model

    Custom Request and Response

    Request and Response are handler preprocessing and postprocessing classes for receiving user requests and returning results. They can be simply understood as pre and post logic for the handler.

    Starwhale provides built-in Request implementations for Dataset types and Json Response. Users can also customize the logic as follows:

    import typing as t

    from starwhale.api.service import (
    Request,
    Service,
    Response,
    )

    class CustomInput(Request):
    def load(self, req: t.Any) -> t.Any:
    return req

    class CustomOutput(Response):
    def __init__(self, prefix: str) -> None:
    self.prefix = prefix

    def dump(self, req: str) -> bytes:
    return f"{self.prefix} {req}".encode("utf-8")

    svc = Service()

    @svc.api(request=CustomInput(), response=CustomOutput("hello"))
    def foo(data: t.Any) -> t.Any:
    ...
    - + \ No newline at end of file diff --git a/0.5.10/reference/sdk/model/index.html b/0.5.10/reference/sdk/model/index.html index a1e20a316..58b7a171e 100644 --- a/0.5.10/reference/sdk/model/index.html +++ b/0.5.10/reference/sdk/model/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.10

    Starwhale Model SDK

    model.build

    model.build is a function that can build the Starwhale model, equivalent to the swcli model build command.

    def build(
    modules: t.Optional[t.List[t.Any]] = None,
    workdir: t.Optional[_path_T] = None,
    name: t.Optional[str] = None,
    project_uri: str = "",
    desc: str = "",
    remote_project_uri: t.Optional[str] = None,
    add_all: bool = False,
    tags: t.List[str] | None = None,
    ) -> None:

    Parameters

    • modules: (List[str|object], optional)
      • The search modules supports object(function, class or module) or str(example: "to.path.module", "to.path.module:object").
      • If the argument is not specified, the search modules are the imported modules.
    • name: (str, optional)
      • Starwhale Model name.
      • The default is the current work dir (cwd) name.
    • workdir: (str, Pathlib.Path, optional)
      • The path of the rootdir. The default workdir is the current working dir.
      • All files in the workdir will be packaged. If you want to ignore some files, you can add .swignore file in the workdir.
    • project_uri: (str, optional)
      • The project uri of the Starwhale Model.
      • If the argument is not specified, the project_uri is the config value of swcli project select command.
    • desc: (str, optional)
      • The description of the Starwhale Model.
    • remote_project_uri: (str, optional)
      • Project URI of another example instance. After the Starwhale model is built, it will be automatically copied to the remote instance.
    • add_all: (bool, optional)
      • Add all files in the working directory to the model package(excludes python cache files and virtual environment files when disabled).The .swignore file still takes effect.
      • The default value is False.
    • tags: (List[str], optional)
      • The tags for the model version.
      • latest and ^v\d+$ tags are reserved tags.

    Examples

    from starwhale import model

    # class search handlers
    from .user.code.evaluator import ExamplePipelineHandler
    model.build([ExamplePipelineHandler])

    # function search handlers
    from .user.code.evaluator import predict_image
    model.build([predict_image])

    # module handlers, @handler decorates function in this module
    from .user.code import evaluator
    model.build([evaluator])

    # str search handlers
    model.build(["user.code.evaluator:ExamplePipelineHandler"])
    model.build(["user.code1", "user.code2"])

    # no search handlers, use imported modules
    model.build()

    # add user custom tags
    model.build(tags=["t1", "t2"])
    - + \ No newline at end of file diff --git a/0.5.10/reference/sdk/other/index.html b/0.5.10/reference/sdk/other/index.html index 84c774943..892c39b6a 100644 --- a/0.5.10/reference/sdk/other/index.html +++ b/0.5.10/reference/sdk/other/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.10

    Other SDK

    __version__

    Version of Starwhale Python SDK and swcli, string constant.

    >>> from starwhale import __version__
    >>> print(__version__)
    0.5.7

    init_logger

    Initialize Starwhale logger and traceback. The default value is 0.

    • 0: show only errors, traceback only shows 1 frame.
    • 1: show errors + warnings, traceback shows 5 frames.
    • 2: show errors + warnings + info, traceback shows 10 frames.
    • 3: show errors + warnings + info + debug, traceback shows 100 frames.
    • >=4: show errors + warnings + info + debug + trace, traceback shows 1000 frames.
    def init_logger(verbose: int = 0) -> None:

    login

    Log in to a server/cloud instance. It is equivalent to running the swcli instance login command. Log in to the Standalone instance is meaningless.

    def login(
    instance: str,
    alias: str = "",
    username: str = "",
    password: str = "",
    token: str = "",
    ) -> None:

    Parameters

    • instance: (str, required)
      • The http url of the server/cloud instance.
    • alias: (str, optional)
      • An alias for the instance to simplify the instance part of the Starwhale URI.
      • If not specified, the hostname part of the instance http url will be used.
    • username: (str, optional)
    • password: (str, optional)
    • token: (str, optional)
      • You can only choose one of username + password or token to login to the instance.

    Examples

    from starwhale import login

    # login to Starwhale Cloud instance by token
    login(instance="https://cloud.starwhale.cn", alias="cloud-cn", token="xxx")

    # login to Starwhale Server instance by username and password
    login(instance="http://controller.starwhale.svc", alias="dev", username="starwhale", password="abcd1234")

    logout

    Log out of a server/cloud instance. It is equivalent to running the swcli instance logout command. Log out of the Standalone instance is meaningless.

    def logout(instance: str) -> None:

    Examples

    from starwhale import login, logout

    login(instance="https://cloud.starwhale.cn", alias="cloud-cn", token="xxx")
    # logout by the alias
    logout("cloud-cn")

    login(instance="http://controller.starwhale.svc", alias="dev", username="starwhale", password="abcd1234")
    # logout by the instance http url
    logout("http://controller.starwhale.svc")
    - + \ No newline at end of file diff --git a/0.5.10/reference/sdk/overview/index.html b/0.5.10/reference/sdk/overview/index.html index 76d6beecf..f90a85991 100644 --- a/0.5.10/reference/sdk/overview/index.html +++ b/0.5.10/reference/sdk/overview/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.10

    Python SDK Overview

    Starwhale provides a series of Python SDKs to help manage datasets, models, evaluations etc. Using the Starwhale Python SDK can make it easier to complete your ML/DL development tasks.

    Classes

    • PipelineHandler: Provides default model evaluation process definition, requires implementation of predict and evaluate methods.
    • Context: Passes context information during model evaluation, including Project, Task ID etc.
    • class Dataset: Starwhale Dataset class.
    • class starwhale.api.service.Service: The base class of online evaluation.

    Functions

    • @multi_classification: Decorator for multi-class problems to simplify evaluate result calculation and storage for better evaluation presentation.
    • @handler: Decorator to define a running entity with resource attributes (mem/cpu/gpu). You can control replica count. Handlers can form DAGs through dependencies to control execution flow.
    • @evaluation.predict: Decorator to define inference process in model evaluation, similar to map phase in MapReduce.
    • @evaluation.evaluate: Decorator to define evaluation process in model evaluation, similar to reduce phase in MapReduce.
    • evaluation.log: Log evaluation metrics to the specific tables.
    • evaluation.log_summary: Log certain metrics to the summary table.
    • evaluation.iter: Iterate and read data from the certain tables.
    • model.build: Build Starwhale model.
    • @fine_tune: Decorator to define model fine-tuning process.
    • init_logger: Set log level, implement 5-level logging.
    • dataset: Get starwhale.Dataset object, by creating new datasets or loading existing datasets.
    • @starwhale.api.service.api: Decorator to provide a simple Web Handler input definition based on Gradio.
    • login: Log in to the server/cloud instance.
    • logout: Log out of the server/cloud instance.

    Data Types

    • COCOObjectAnnotation: Provides COCO format definitions.
    • BoundingBox: Bounding box type, currently in LTWH format - left_x, top_y, width and height.
    • ClassLabel: Describes the number and types of labels.
    • Image: Image type.
    • GrayscaleImage: Grayscale image type, e.g. MNIST digit images, a special case of Image type.
    • Audio: Audio type.
    • Video: Video type.
    • Text: Text type, default utf-8 encoding, for storing large texts.
    • Binary: Binary type, stored in bytes, for storing large binary content.
    • Line: Line type.
    • Point: Point type.
    • Polygon: Polygon type.
    • Link: Link type, for creating remote-link data.
    • S3LinkAuth: When data is stored in S3-based object storage, this type describes auth and key info.
    • MIMEType: Describes multimedia types supported by Starwhale, used in mime_type attribute of Image, Video etc for better Dataset Viewer.
    • LinkType: Describes remote link types supported by Starwhale, currently LocalFS and S3.

    Other

    • __version__: Version of Starwhale Python SDK and swcli, string constant.

    Further reading

    - + \ No newline at end of file diff --git a/0.5.10/reference/sdk/type/index.html b/0.5.10/reference/sdk/type/index.html index 2811abe6b..a45814d64 100644 --- a/0.5.10/reference/sdk/type/index.html +++ b/0.5.10/reference/sdk/type/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.10

    Starwhale Data Types

    COCOObjectAnnotation

    It provides definitions following the COCO format.

    COCOObjectAnnotation(
    id: int,
    image_id: int,
    category_id: int,
    segmentation: Union[t.List, t.Dict],
    area: Union[float, int],
    bbox: Union[BoundingBox, t.List[float]],
    iscrowd: int,
    )
    ParameterDescription
    idObject id, usually a globally incrementing id
    image_idImage id, usually id of the image
    category_idCategory id, usually id of the class in object detection
    segmentationObject contour representation, Polygon (polygon vertices) or RLE format
    areaObject area
    bboxRepresents bounding box, can be BoundingBox type or list of floats
    iscrowd0 indicates a single object, 1 indicates two unseparated objects

    Examples

    def _make_coco_annotations(
    self, mask_fpath: Path, image_id: int
    ) -> t.List[COCOObjectAnnotation]:
    mask_img = PILImage.open(str(mask_fpath))

    mask = np.array(mask_img)
    object_ids = np.unique(mask)[1:]
    binary_mask = mask == object_ids[:, None, None]
    # TODO: tune permute without pytorch
    binary_mask_tensor = torch.as_tensor(binary_mask, dtype=torch.uint8)
    binary_mask_tensor = (
    binary_mask_tensor.permute(0, 2, 1).contiguous().permute(0, 2, 1)
    )

    coco_annotations = []
    for i in range(0, len(object_ids)):
    _pos = np.where(binary_mask[i])
    _xmin, _ymin = float(np.min(_pos[1])), float(np.min(_pos[0]))
    _xmax, _ymax = float(np.max(_pos[1])), float(np.max(_pos[0]))
    _bbox = BoundingBox(
    x=_xmin, y=_ymin, width=_xmax - _xmin, height=_ymax - _ymin
    )

    rle: t.Dict = coco_mask.encode(binary_mask_tensor[i].numpy()) # type: ignore
    rle["counts"] = rle["counts"].decode("utf-8")

    coco_annotations.append(
    COCOObjectAnnotation(
    id=self.object_id,
    image_id=image_id,
    category_id=1, # PennFudan Dataset only has one class-PASPersonStanding
    segmentation=rle,
    area=_bbox.width * _bbox.height,
    bbox=_bbox,
    iscrowd=0, # suppose all instances are not crowd
    )
    )
    self.object_id += 1

    return coco_annotations

    GrayscaleImage

    GrayscaleImage provides a grayscale image type. It is a special case of the Image type, for example the digit images in MNIST.

    GrayscaleImage(
    fp: _TArtifactFP = "",
    display_name: str = "",
    shape: Optional[_TShape] = None,
    as_mask: bool = False,
    mask_uri: str = "",
    )
    ParameterDescription
    fpImage path, IO object, or file content bytes
    display_nameDisplay name shown in Dataset Viewer
    shapeImage width and height, default channel is 1
    as_maskWhether used as a mask image
    mask_uriURI of the original image for the mask

    Examples

    for i in range(0, min(data_number, label_number)):
    _data = data_file.read(image_size)
    _label = struct.unpack(">B", label_file.read(1))[0]
    yield GrayscaleImage(
    _data,
    display_name=f"{i}",
    shape=(height, width, 1),
    ), {"label": _label}

    GrayscaleImage Functions

    GrayscaleImage.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    GrayscaleImage.carry_raw_data

    carry_raw_data() -> GrayscaleImage

    GrayscaleImage.astype

    astype() -> Dict[str, t.Any]

    BoundingBox

    BoundingBox provides a bounding box type, currently in LTWH format:

    • left_x: x-coordinate of left edge
    • top_y: y-coordinate of top edge
    • width: width of bounding box
    • height: height of bounding box

    So it represents the bounding box using the coordinates of its left, top, width and height. This is a common format for specifying bounding boxes in computer vision tasks.

    BoundingBox(
    x: float,
    y: float,
    width: float,
    height: float
    )
    ParameterDescription
    xx-coordinate of left edge (left_x)
    yy-coordinate of top edge (top_y)
    widthWidth of bounding box
    heightHeight of bounding box

    ClassLabel

    Describe labels.

    ClassLabel(
    names: List[Union[int, float, str]]
    )

    Image

    Image Type.

    Image(
    fp: _TArtifactFP = "",
    display_name: str = "",
    shape: Optional[_TShape] = None,
    mime_type: Optional[MIMEType] = None,
    as_mask: bool = False,
    mask_uri: str = "",
    )
    ParameterDescription
    fpImage path, IO object, or file content bytes
    display_nameDisplay name shown in Dataset Viewer
    shapeImage width, height and channels
    mime_typeMIMEType supported types
    as_maskWhether used as a mask image
    mask_uriURI of the original image for the mask

    The main difference from GrayscaleImage is that Image supports multi-channel RGB images by specifying shape as (W, H, C).

    Examples

    import io
    import typing as t
    import pickle
    from PIL import Image as PILImage
    from starwhale import Image, MIMEType

    def _iter_item(paths: t.List[Path]) -> t.Generator[t.Tuple[t.Any, t.Dict], None, None]:
    for path in paths:
    with path.open("rb") as f:
    content = pickle.load(f, encoding="bytes")
    for data, label, filename in zip(
    content[b"data"], content[b"labels"], content[b"filenames"]
    ):
    annotations = {
    "label": label,
    "label_display_name": dataset_meta["label_names"][label],
    }

    image_array = data.reshape(3, 32, 32).transpose(1, 2, 0)
    image_bytes = io.BytesIO()
    PILImage.fromarray(image_array).save(image_bytes, format="PNG")

    yield Image(
    fp=image_bytes.getvalue(),
    display_name=filename.decode(),
    shape=image_array.shape,
    mime_type=MIMEType.PNG,
    ), annotations

    Image Functions

    Image.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Image.carry_raw_data

    carry_raw_data() -> GrayscaleImage

    Image.astype

    astype() -> Dict[str, t.Any]

    Video

    Video type.

    Video(
    fp: _TArtifactFP = "",
    display_name: str = "",
    mime_type: Optional[MIMEType] = None,
    )
    ParameterDescription
    fpVideo path, IO object, or file content bytes
    display_nameDisplay name shown in Dataset Viewer
    mime_typeMIMEType supported types

    Examples

    import typing as t
    from pathlib import Path

    from starwhale import Video, MIMEType

    root_dir = Path(__file__).parent.parent
    dataset_dir = root_dir / "data" / "UCF-101"
    test_ds_path = [root_dir / "data" / "test_list.txt"]

    def iter_ucf_item() -> t.Generator:
    for path in test_ds_path:
    with path.open() as f:
    for line in f.readlines():
    _, label, video_sub_path = line.split()

    data_path = dataset_dir / video_sub_path
    data = Video(
    data_path,
    display_name=video_sub_path,
    shape=(1,),
    mime_type=MIMEType.WEBM,
    )

    yield f"{label}_{video_sub_path}", {
    "video": data,
    "label": label,
    }

    Audio

    Audio type.

    Audio(
    fp: _TArtifactFP = "",
    display_name: str = "",
    mime_type: Optional[MIMEType] = None,
    )
    ParameterDescription
    fpAudio path, IO object, or file content bytes
    display_nameDisplay name shown in Dataset Viewer
    mime_typeMIMEType supported types

    Examples

    import typing as t
    from starwhale import Audio

    def iter_item() -> t.Generator[t.Tuple[t.Any, t.Any], None, None]:
    for path in validation_ds_paths:
    with path.open() as f:
    for item in f.readlines():
    item = item.strip()
    if not item:
    continue

    data_path = dataset_dir / item
    data = Audio(
    data_path, display_name=item, shape=(1,), mime_type=MIMEType.WAV
    )

    speaker_id, utterance_num = data_path.stem.split("_nohash_")
    annotations = {
    "label": data_path.parent.name,
    "speaker_id": speaker_id,
    "utterance_num": int(utterance_num),
    }
    yield data, annotations

    Audio Functions

    Audio.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Audio.carry_raw_data

    carry_raw_data() -> Audio

    Audio.astype

    astype() -> Dict[str, t.Any]

    Text

    Text type, the default encode type is utf-8.

    Text(
    content: str,
    encoding: str = "utf-8",
    )
    ParameterDescription
    contentThe text content
    encodingEncoding format of the text

    Examples

    import typing as t
    from pathlib import Path
    from starwhale import Text

    def iter_item(self) -> t.Generator[t.Tuple[t.Any, t.Any], None, None]:
    root_dir = Path(__file__).parent.parent / "data"

    with (root_dir / "fra-test.txt").open("r") as f:
    for line in f.readlines():
    line = line.strip()
    if not line or line.startswith("CC-BY"):
    continue

    _data, _label, *_ = line.split("\t")
    data = Text(_data, encoding="utf-8")
    annotations = {"label": _label}
    yield data, annotations

    Text Functions

    to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Text.carry_raw_data

    carry_raw_data() -> Text

    Text.astype

    astype() -> Dict[str, t.Any]

    Text.to_str

    to_str() -> str

    Binary

    Binary provides a binary data type, stored as bytes.

    Binary(
    fp: _TArtifactFP = "",
    mime_type: MIMEType = MIMEType.UNDEFINED,
    )
    ParameterDescription
    fpPath, IO object, or file content bytes
    mime_typeMIMEType supported types

    Binary Functions

    Binary.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Binary.carry_raw_data

    carry_raw_data() -> Binary

    Binary.astype

    astype() -> Dict[str, t.Any]

    Link provides a link type to create remote-link datasets in Starwhale.

    Link(
    uri: str,
    auth: Optional[LinkAuth] = DefaultS3LinkAuth,
    offset: int = 0,
    size: int = -1,
    data_type: Optional[BaseArtifact] = None,
    )
    ParameterDescription
    uriURI of the original data, currently supports localFS and S3 protocols
    authLink auth information
    offsetData offset relative to file pointed by uri
    sizeData size
    data_typeActual data type pointed by the link, currently supports Binary, Image, Text, Audio and Video

    Link.astype

    astype() -> Dict[str, t.Any]

    S3LinkAuth

    S3LinkAuth provides authentication and key information when data is stored on S3 protocol based object storage.

    S3LinkAuth(
    name: str = "",
    access_key: str = "",
    secret: str = "",
    endpoint: str = "",
    region: str = "local",
    )
    ParameterDescription
    nameName of the auth
    access_keyAccess key for S3 connection
    secretSecret for S3 connection
    endpointEndpoint URL for S3 connection
    regionS3 region where bucket is located, default is local.

    Examples

    import struct
    import typing as t
    from pathlib import Path

    from starwhale import (
    Link,
    S3LinkAuth,
    GrayscaleImage,
    UserRawBuildExecutor,
    )
    class LinkRawDatasetProcessExecutor(UserRawBuildExecutor):
    _auth = S3LinkAuth(name="mnist", access_key="minioadmin", secret="minioadmin")
    _endpoint = "10.131.0.1:9000"
    _bucket = "users"

    def iter_item(self) -> t.Generator[t.Tuple[t.Any, t.Any], None, None]:
    root_dir = Path(__file__).parent.parent / "data"

    with (root_dir / "t10k-labels-idx1-ubyte").open("rb") as label_file:
    _, label_number = struct.unpack(">II", label_file.read(8))

    offset = 16
    image_size = 28 * 28

    uri = f"s3://{self._endpoint}/{self._bucket}/dataset/mnist/t10k-images-idx3-ubyte"
    for i in range(label_number):
    _data = Link(
    f"{uri}",
    self._auth,
    offset=offset,
    size=image_size,
    data_type=GrayscaleImage(display_name=f"{i}", shape=(28, 28, 1)),
    )
    _label = struct.unpack(">B", label_file.read(1))[0]
    yield _data, {"label": _label}
    offset += image_size

    MIMEType

    MIMEType describes the multimedia types supported by Starwhale, implemented using Python Enum. It is used in the mime_type attribute of Image, Video etc to enable better Dataset Viewer support.

    class MIMEType(Enum):
    PNG = "image/png"
    JPEG = "image/jpeg"
    WEBP = "image/webp"
    SVG = "image/svg+xml"
    GIF = "image/gif"
    APNG = "image/apng"
    AVIF = "image/avif"
    PPM = "image/x-portable-pixmap"
    MP4 = "video/mp4"
    AVI = "video/avi"
    WEBM = "video/webm"
    WAV = "audio/wav"
    MP3 = "audio/mp3"
    PLAIN = "text/plain"
    CSV = "text/csv"
    HTML = "text/html"
    GRAYSCALE = "x/grayscale"
    UNDEFINED = "x/undefined"

    LinkType

    LinkType describes the remote link types supported by Starwhale, also implemented using Python Enum. Currently supports LocalFS and S3 types.

    class LinkType(Enum):
    LocalFS = "local_fs"
    S3 = "s3"
    UNDEFINED = "undefined"

    Line

    from starwhale import ds, Point, Line

    with dataset("collections") as ds:
    line_points = [
    Point(x=0.0, y=1.0),
    Point(x=0.0, y=100.0)
    ]
    ds.append({"line": line_points})
    ds.commit()

    Point

    from starwhale import ds, Point

    with dataset("collections") as ds:
    ds.append(Point(x=0.0, y=100.0))
    ds.commit()

    Polygon

    from starwhale import ds, Point, Polygon

    with dataset("collections") as ds:
    polygon_points = [
    Point(x=0.0, y=1.0),
    Point(x=0.0, y=100.0),
    Point(x=2.0, y=1.0),
    Point(x=2.0, y=100.0),
    ]
    ds.append({"polygon": polygon_points})
    ds.commit()
    - + \ No newline at end of file diff --git a/0.5.10/reference/swcli/dataset/index.html b/0.5.10/reference/swcli/dataset/index.html index ddf9f827d..988314db1 100644 --- a/0.5.10/reference/swcli/dataset/index.html +++ b/0.5.10/reference/swcli/dataset/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.10

    swcli dataset

    Overview

    swcli [GLOBAL OPTIONS] dataset [OPTIONS] <SUBCOMMAND> [ARGS]...

    The dataset command includes the following subcommands:

    • build
    • copy(cp)
    • diff
    • head
    • history
    • info
    • list(ls)
    • recover
    • remove(rm)
    • summary
    • tag

    swcli dataset build

    swcli [GLOBAL OPTIONS] dataset build [OPTIONS]

    Build Starwhale Dataset. This command only supports to build standalone dataset.

    Options

    • Data sources options:
    OptionRequiredTypeDefaultsDescription
    -if or --image or --image-folderNStringBuild dataset from image folder, the folder should contain the image files.
    -af or --audio or --audio-folderNStringBuild dataset from audio folder, the folder should contain the audio files.
    -vf or --video or --video-folderNStringBuild dataset from video folder, the folder should contain the video files.
    -h or --handler or --python-handlerNStringBuild dataset from python executor handler, the handler format is [module path]:[class or func name].
    -f or --yaml or --dataset-yamlNdataset.yaml in cwdBuild dataset from dataset.yaml file. Default uses dataset.yaml in the work directory(cwd).
    -jf or --json-fileNStringBuild dataset from json file, the json file option is a json file path or a http downloaded url.The json content structure should be a list[dict] or tuple[dict].
    -hf or --huggingfaceNStringBuild dataset from huggingface dataset, the huggingface option is a huggingface repo name.

    Data source options are mutually exclusive, only one option is accepted. If no set, swcli dataset build command will use dataset yaml mode to build dataset with the dataset.yaml in the cwd.

    • Other options:
    OptionRequiredScopeTypeDefaultsDescription
    -pt or --patchone of --patch and --overwriteGlobalBooleanTruePatch mode, only update the changed rows and columns for the existed dataset.
    -ow or --overwriteone of --patch and --overwriteGlobalBooleanFalseOverwrite mode, update records and delete extraneous rows from the existed dataset.
    -n or --nameNGlobalStringDataset name
    -p or --projectNGlobalStringDefault projectProject URI, the default is the current selected project. The dataset will store in the specified project.
    -d or --descNGlobalStringDataset description
    -as or --alignment-sizeNGlobalString128Bswds-bin format dataset: alignment size
    -vs or --volume-sizeNGlobalString64MBswds-bin format dataset: volume size
    -r or --runtimeNGlobalStringRuntime URI
    -w or --workdirNPython Handler ModeStringcwdwork dir to search handler.
    --auto-label/--no-auto-labelNImage/Video/Audio Folder ModeBooleanTrueWhether to auto label by the sub-folder name.
    --field-selectorNJSON File ModeStringThe filed from which you would like to extract dataset array items. The filed is split by the dot(.) symbol.
    --subsetNHuggingface ModeStringHuggingface dataset subset name. If the huggingface dataset has multiple subsets, you must specify the subset name.
    --splitNHuggingface ModeStringHuggingface dataset split name. If the split name is not specified, the all splits dataset will be built.
    --revisionNHuggingface ModeStringmainVersion of the dataset script to load. Defaults to 'main'. The option value accepts tag name, or branch name, or commit hash.
    --cache/--no-cacheNHuggingface ModeBooleanTrueWhether to use huggingface dataset cache(download + local hf dataset).
    -t or --tagNGlobalStringDataset tags, the option can be used multiple times.

    Examples for dataset building

    #- from dataset.yaml
    swcli dataset build # build dataset from dataset.yaml in the current work directory(pwd)
    swcli dataset build --yaml /path/to/dataset.yaml # build dataset from /path/to/dataset.yaml, all the involved files are related to the dataset.yaml file.
    swcli dataset build --overwrite --yaml /path/to/dataset.yaml # build dataset from /path/to/dataset.yaml, and overwrite the existed dataset.
    swcli dataset build --tag tag1 --tag tag2

    #- from handler
    swcli dataset build --handler mnist.dataset:iter_mnist_item # build dataset from mnist.dataset:iter_mnist_item handler, the workdir is the current work directory(pwd).
    # build dataset from mnist.dataset:LinkRawDatasetProcessExecutor handler, the workdir is example/mnist
    swcli dataset build --handler mnist.dataset:LinkRawDatasetProcessExecutor --workdir example/mnist

    #- from image folder
    swcli dataset build --image-folder /path/to/image/folder # build dataset from /path/to/image/folder, search all image type files.

    #- from audio folder
    swcli dataset build --audio-folder /path/to/audio/folder # build dataset from /path/to/audio/folder, search all audio type files.

    #- from video folder
    swcli dataset build --video-folder /path/to/video/folder # build dataset from /path/to/video/folder, search all video type files.

    #- from json file
    swcli dataset build --json-file /path/to/example.json
    swcli dataset build --json-file http://example.com/example.json
    swcli dataset build --json-file /path/to/example.json --field-selector a.b.c # extract the json_content["a"]["b"]["c"] field from the json file.
    swcli dataset build --name qald9 --json-file https://raw.githubusercontent.com/ag-sc/QALD/master/9/data/qald-9-test-multilingual.json --field-selector questions

    #- from huggingface dataset
    swcli dataset build --huggingface mnist
    swcli dataset build -hf mnist --no-cache
    swcli dataset build -hf cais/mmlu --subset anatomy --split auxiliary_train --revision 7456cfb

    swcli dataset copy

    swcli [GLOBAL OPTIONS] dataset copy [OPTIONS] <SRC> <DEST>

    dataset copy copies from SRC to DEST.

    SRC and DEST are both dataset URIs.

    When copying Starwhale Dataset, all custom user-defined labels will be copied by default. You can use the --ignore-tag parameter to ignore certain labels. In addition, the latest and ^v\d+$ labels are Starwhale built-in labels that are only used within the instance itself and will not be copied to other instances.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, DEST will be overwritten if it exists. In addition, if the labels carried during duplication have already been used by other versions, this parameter can be used to forcibly update the labels to this version.
    -p or --patchone of --patch and --overwriteBooleanTruePatch mode, only update the changed rows and columns for the remote dataset.
    -o or --overwriteone of --patch and --overwriteBooleanFalseOverwrite mode, update records and delete extraneous rows from the remote dataset.
    -i or --ignore-tagNStringIgnore tags to copy. The option can be used multiple times.

    Examples for dataset copy

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud dataset to local project(myproject) with a new dataset name 'mnist-local'
    swcli dataset cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq local/project/myproject/mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud dataset to local default project(self) with the cloud instance dataset name 'mnist-cloud'
    swcli dataset cp --patch cloud://pre-k8s/project/dataset/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq .

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud dataset to local project(myproject) with the cloud instance dataset name 'mnist-cloud'
    swcli dataset cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq . -dlp myproject

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud dataset to local default project(self) with a dataset name 'mnist-local'
    swcli dataset cp --overwrite cloud://pre-k8s/project/dataset/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud dataset to local project(myproject) with a dataset name 'mnist-local'
    swcli dataset cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local -dlp myproject

    #- copy standalone instance(local) default project(self)'s mnist-local dataset to cloud instance(pre-k8s) mnist project with a new dataset name 'mnist-cloud'
    swcli dataset cp mnist-local/version/latest cloud://pre-k8s/project/mnist/mnist-cloud

    #- copy standalone instance(local) default project(self)'s mnist-local dataset to cloud instance(pre-k8s) mnist project with standalone instance dataset name 'mnist-local'
    swcli dataset cp mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy standalone instance(local) default project(self)'s mnist-local dataset to cloud instance(pre-k8s) mnist project without 'cloud://' prefix
    swcli dataset cp mnist-local/version/latest pre-k8s/project/mnist

    #- copy standalone instance(local) project(myproject)'s mnist-local dataset to cloud instance(pre-k8s) mnist project with standalone instance dataset name 'mnist-local'
    swcli dataset cp local/project/myproject/dataset/mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy without some tags
    swcli dataset cp mnist cloud://cloud.starwhale.cn/project/starwhale:public --ignore-tag t1 --force

    swcli dataset diff

    swcli [GLOBAL OPTIONS] dataset diff [OPTIONS] <DATASET VERSION> <DATASET VERSION>

    dataset diff compares the difference between two versions of the same dataset.

    DATASET VERSION is a dataset URI.

    OptionRequiredTypeDefaultsDescription
    --show-detailsNBooleanFalseIf true, outputs the detail information.
    swcli [全局选项] dataset head [选项] <DATASET VERSION>

    Print the first n rows of the dataset. DATASET VERSION is a dataset URI.

    OptionRequiredTypeDefaultsDescription
    -n or --rowsNInt5Print the first NUM rows of the dataset.
    -srd or --show-raw-dataNBooleanFalseFetch raw data content from objectstore.
    -st or --show-typesNBooleanFalseshow data types.

    Examples for dataset head

    #- print the first 5 rows of the mnist dataset
    swcli dataset head -n 5 mnist

    #- print the first 10 rows of the mnist(v0 version) dataset and show raw data
    swcli dataset head -n 10 mnist/v0 --show-raw-data

    #- print the data types of the mnist dataset
    swcli dataset head mnist --show-types

    #- print the remote cloud dataset's first 5 rows
    swcli dataset head cloud://cloud-cn/project/test/dataset/mnist -n 5

    #- print the first 5 rows in the json format
    swcli -o json dataset head -n 5 mnist

    swcli dataset history

    swcli [GLOBAL OPTIONS] dataset history [OPTIONS] <DATASET>

    dataset history outputs all history versions of the specified Starwhale Dataset.

    DATASET is a dataset URI.

    OptionRequiredTypeDefaultsDescription
    --fullnameNBooleanFalseShow the full version name. Only the first 12 characters are shown if this option is false.

    swcli dataset info

    swcli [GLOBAL OPTIONS] dataset info [OPTIONS] <DATASET>

    dataset info outputs detailed information about the specified Starwhale Dataset version.

    DATASET is a dataset URI.

    swcli dataset list

    swcli [GLOBAL OPTIONS] dataset list [OPTIONS]

    dataset list shows all Starwhale Datasets.

    OptionRequiredTypeDefaultsDescription
    --projectNStringThe URI of the project to list. Use the default project if not specified.
    --fullnameNBooleanFalseShow the full version name. Only the first 12 characters are shown if this option is false.
    --show-removed or -srNBooleanFalseIf true, include datasets that are removed but not garbage collected.
    --pageNInteger1The starting page number. Server and cloud instances only.
    --sizeNInteger20The number of items in one page. Server and cloud instances only.
    --filter or -flNStringShow only Starwhale Datasetes that match specified filters. This option can be used multiple times in one command.
    FilterTypeDescriptionExample
    nameKey-ValueThe name prefix of datasets--filter name=mnist
    ownerKey-ValueThe dataset owner name--filter owner=starwhale
    latestFlagIf specified, it shows only the latest version.--filter latest

    swcli dataset recover

    swcli [GLOBAL OPTIONS] dataset recover [OPTIONS] <DATASET>

    dataset recover recovers previously removed Starwhale Datasets or versions.

    DATASET is a dataset URI. If the version part of the URI is omitted, all removed versions are recovered.

    Garbage-collected Starwhale Datasets or versions can not be recovered, as well as those are removed with the --force option.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, overwrite the Starwhale Dataset or version with the same name or version id.

    swcli dataset remove

    swcli [GLOBAL OPTIONS] dataset remove [OPTIONS] <DATASET>

    dataset remove removes the specified Starwhale Dataset or version.

    DATASET is a dataset URI. If the version part of the URI is omitted, all versions are removed.

    Removed Starwhale Datasets or versions can be recovered by swcli dataset recover before garbage collection. Use the --force option to persistently remove a Starwhale Dataset or version.

    Removed Starwhale Datasets or versions can be listed by swcli dataset list --show-removed.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, persistently delete the Starwhale Dataset or version. It can not be recovered.

    swcli dataset summary

    swcli [GLOBAL OPTIONS]  dataset summary <DATASET>

    Show dataset summary. DATASET is a dataset URI.

    swcli dataset tag

    swcli [GLOBAL OPTIONS] dataset tag [OPTIONS] <DATASET> [TAGS]...

    dataset tag attaches a tag to a specified Starwhale Dataset version. At the same time, tag command also supports list and remove tags. The tag can be used in a dataset URI instead of the version id.

    DATASET is a dataset URI.

    Each dataset version can have any number of tags, but duplicated tag names are not allowed in the same dataset.

    dataset tag only works for the Standalone Instance.

    OptionRequiredTypeDefaultsDescription
    --remove or -rNBooleanFalseremove the tag if true
    --quiet or -qNBooleanFalseignore errors, for example, removing tags that do not exist.
    --force-add or -fNBooleanFalseWhen adding labels to server/cloud instances, if the label is already used by another dataset version, an error will be prompted. In this case, you can force an update using the --force-add parameter.

    Examples for dataset tag

    #- list tags of the mnist dataset
    swcli dataset tag mnist

    #- add tags for the mnist dataset
    swcli dataset tag mnist -t t1 -t t2
    swcli dataset tag cloud://cloud.starwhale.cn/project/public:starwhale/dataset/mnist/version/latest -t t1 --force-add
    swcli dataset tag mnist -t t1 --quiet

    #- remove tags for the mnist dataset
    swcli dataset tag mnist -r -t t1 -t t2
    swcli dataset tag cloud://cloud.starwhale.cn/project/public:starwhale/dataset/mnist --remove -t t1
    - + \ No newline at end of file diff --git a/0.5.10/reference/swcli/index.html b/0.5.10/reference/swcli/index.html index 7527957a0..02e9a744a 100644 --- a/0.5.10/reference/swcli/index.html +++ b/0.5.10/reference/swcli/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.10

    Overview

    Usage

    swcli [OPTIONS] <COMMAND> [ARGS]...
    note

    sw and starwhale are aliases for swcli.

    Global Options

    OptionDescription
    --versionShow the Starwhale Client version
    -v or --verboseShow verbose log, support multi counts for -v args. More -v args, more logs.
    --helpShow the help message.
    caution

    Global options must be put immediately after swcli, and before any command.

    Commands

    - + \ No newline at end of file diff --git a/0.5.10/reference/swcli/instance/index.html b/0.5.10/reference/swcli/instance/index.html index 341bc94c5..d86b78252 100644 --- a/0.5.10/reference/swcli/instance/index.html +++ b/0.5.10/reference/swcli/instance/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.10

    swcli instance

    Overview

    swcli [GLOBAL OPTIONS] instance [OPTIONS] <SUBCOMMAND> [ARGS]

    The instance command includes the following subcommands:

    • info
    • list (ls)
    • login
    • logout
    • use (select)

    swcli instance info

    swcli [GLOBAL OPTIONS] instance info [OPTIONS] <INSTANCE>

    instance info outputs detailed information about the specified Starwhale Instance.

    INSTANCE is an instance URI.

    swcli instance list

    swcli [GLOBAL OPTIONS] instance list [OPTIONS]

    instance list shows all Starwhale Instances.

    swcli instance login

    swcli [GLOBAL OPTIONS] instance login [OPTIONS] <INSTANCE>

    instance login connects to a Server/Cloud instance and makes the specified instance default.

    INSTANCE is an instance URI.

    OptionRequiredTypeDefaultsDescription
    --usernameNStringThe login username.
    --passwordNStringThe login password.
    --tokenNStringThe login token.
    --aliasYStringThe alias of the instance. You can use it anywhere that requires an instance URI.

    --username and --password can not be used together with --token.

    swcli instance logout

    swcli [GLOBAL OPTIONS] instance logout [INSTANCE]

    instance logout disconnects from the Server/Cloud instance, and clears information stored in the local storage.

    INSTANCE is an instance URI. If it is omiited, the default instance is used instead.

    swcli instance use

    swcli [GLOBAL OPTIONS] instance use <INSTANCE>

    instance use make the specified instance default.

    INSTANCE is an instance URI.

    - + \ No newline at end of file diff --git a/0.5.10/reference/swcli/job/index.html b/0.5.10/reference/swcli/job/index.html index 0e984a923..d25820269 100644 --- a/0.5.10/reference/swcli/job/index.html +++ b/0.5.10/reference/swcli/job/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.10

    swcli job

    Overview

    swcli [GLOBAL OPTIONS] job [OPTIONS] <SUBCOMMAND> [ARGS]...

    The job command includes the following subcommands:

    • cancel
    • info
    • list(ls)
    • pause
    • recover
    • remove(rm)
    • resume

    swcli job cancel

    swcli [GLOBAL OPTIONS] job cancel [OPTIONS] <JOB>

    job cancel stops the specified job. On Standalone instance, this command only takes effect for containerized jobs.

    JOB is a job URI.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, kill the Starwhale Job by force.

    swcli job info

    swcli [GLOBAL OPTIONS] job info [OPTIONS] <JOB>

    job info outputs detailed information about the specified Starwhale Job.

    JOB is a job URI.

    OptionRequiredTypeDefaultsDescription
    --pageNInteger1The starting page number. Server and cloud instances only.
    --sizeNInteger20The number of items in one page. Server and cloud instances only.

    swcli job list

    swcli [GLOBAL OPTIONS] job list [OPTIONS]

    job list shows all Starwhale Jobs.

    OptionRequiredTypeDefaultsDescription
    --projectNStringThe URI of the project to list. Use the default project if not specified.
    --show-removed or -srNBooleanFalseIf true, include packages that are removed but not garbage collected.
    --pageNInteger1The starting page number. Server and cloud instances only.
    --sizeNInteger20The number of items in one page. Server and cloud instances only.

    swcli job pause

    swcli [GLOBAL OPTIONS] job pause [OPTIONS] <JOB>

    job pause pauses the specified job. Paused jobs can be resumed by job resume. On Standalone instance, this command only takes effect for containerized jobs.

    JOB is a job URI.

    From Starwhale's perspective, pause is almost the same as cancel, except that the job reuses the old Job id when resumed. It is job developer's responsibility to save all data periodically and load them when resumed. The job id is usually used as a key of the checkpoint.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, kill the Starwhale Job by force.

    swcli job resume

    swcli [GLOBAL OPTIONS] job resume [OPTIONS] <JOB>

    job resume resumes the specified job. On Standalone instance, this command only takes effect for containerized jobs.

    JOB is a job URI.

    - + \ No newline at end of file diff --git a/0.5.10/reference/swcli/model/index.html b/0.5.10/reference/swcli/model/index.html index 09cde98d9..7b5f3b083 100644 --- a/0.5.10/reference/swcli/model/index.html +++ b/0.5.10/reference/swcli/model/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.10

    swcli model

    Overview

    swcli [GLOBAL OPTIONS] model [OPTIONS] <SUBCOMMAND> [ARGS]...

    The model command includes the following subcommands:

    • build
    • copy(cp)
    • diff
    • extract
    • history
    • info
    • list(ls)
    • recover
    • remove(rm)
    • run
    • serve
    • tag

    swcli model build

    swcli [GLOBAL OPTIONS] model build [OPTIONS] <WORKDIR>

    model build will put the whole WORKDIR into the model, except files that match patterns defined in .swignore.

    model build will import modules specified by --module to generate the required configurations to run the model. If your module depends on third-party libraries, we strongly recommend you use the --runtime option; otherwise, you need to ensure that the python environment used by swcli has these libraries installed.

    OptionRequiredTypeDefaultsDescription
    --project or -pNStringthe default projectthe project URI
    --model-yaml or -fNString${workdir}/model.yamlmodel yaml path, default use ${workdir}/model.yaml file. model.yaml is optional for model build.
    --module or -mNStringPython modules to be imported during the build process. Starwhale will export model handlers from these modules to the model package. This option supports set multiple times.
    --runtime or -rNStringthe URI of the Starwhale Runtime to use when running this command. If this option is used, this command will run in an independent python environment specified by the Starwhale Runtime; otherwise, it will run directly in the swcli's current python environment.
    --name or -nNStringmodel package name
    --desc or -dNStringmodel package description
    --package-runtime--no-package-runtimeNBooleanTrueWhen using the --runtime parameter, by default, the corresponding Starwhale runtime will become the built-in runtime for the Starwhale model. This feature can be disabled with the --no-package-runtime parameter.
    --add-allNBooleanFalseAdd all files in the working directory to the model package(excludes python cache files and virtual environment files when disabled).The .swignore file still takes effect.
    -t or --tagNGlobalString

    Examples for model build

    # build by the model.yaml in current directory and model package will package all the files from the current directory.
    swcli model build .
    # search model run decorators from mnist.evaluate, mnist.train and mnist.predict modules, then package all the files from the current directory to model package.
    swcli model build . --module mnist.evaluate --module mnist.train --module mnist.predict
    # build model package in the Starwhale Runtime environment.
    swcli model build . --module mnist.evaluate --runtime pytorch/version/v1
    # forbid to package Starwhale Runtime into the model.
    swcli model build . --module mnist.evaluate --runtime pytorch/version/v1 --no-package-runtime
    # build model package with tags.
    swcli model build . --tag tag1 --tag tag2

    swcli model copy

    swcli [GLOBAL OPTIONS] model copy [OPTIONS] <SRC> <DEST>

    model copy copies from SRC to DEST for Starwhale Model sharing.

    SRC and DEST are both model URIs.

    When copying Starwhale Model, all custom user-defined labels will be copied by default. You can use the --ignore-tag parameter to ignore certain labels. In addition, the latest and ^v\d+$ labels are Starwhale built-in labels that are only used within the instance itself and will not be copied to other instances.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, DEST will be overwritten if it exists. In addition, if the labels carried during duplication have already been used by other versions, this parameter can be used to forcibly update the labels to this version.
    -i or --ignore-tagNStringIgnore tags to copy. The option can be used multiple times.

    Examples for model copy

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local project(myproject) with a new model name 'mnist-local'
    swcli model cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq local/project/myproject/mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local default project(self) with the cloud instance model name 'mnist-cloud'
    swcli model cp cloud://pre-k8s/project/model/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq .

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local project(myproject) with the cloud instance model name 'mnist-cloud'
    swcli model cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq . -dlp myproject

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local default project(self) with a model name 'mnist-local'
    swcli model cp cloud://pre-k8s/project/model/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local project(myproject) with a model name 'mnist-local'
    swcli model cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local -dlp myproject

    #- copy standalone instance(local) default project(self)'s mnist-local model to cloud instance(pre-k8s) mnist project with a new model name 'mnist-cloud'
    swcli model cp mnist-local/version/latest cloud://pre-k8s/project/mnist/mnist-cloud

    #- copy standalone instance(local) default project(self)'s mnist-local model to cloud instance(pre-k8s) mnist project with standalone instance model name 'mnist-local'
    swcli model cp mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy standalone instance(local) default project(self)'s mnist-local model to cloud instance(pre-k8s) mnist project without 'cloud://' prefix
    swcli model cp mnist-local/version/latest pre-k8s/project/mnist

    #- copy standalone instance(local) project(myproject)'s mnist-local model to cloud instance(pre-k8s) mnist project with standalone instance model name 'mnist-local'
    swcli model cp local/project/myproject/model/mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy without some tags
    swcli model cp mnist cloud://cloud.starwhale.cn/project/starwhale:public --ignore-tag t1

    swcli model diff

    swcli [GLOBAL OPTIONS] model diff [OPTIONS] <MODEL VERSION> <MODEL VERSION>

    model diff compares the difference between two versions of the same model.

    MODEL VERSION is a model URI.

    OptionRequiredTypeDefaultsDescription
    --show-detailsNBooleanFalseIf true, outputs the detail information.

    swcli model extract

    swcli [GLOBAL OPTIONS] model extract [OPTIONS] <MODEL> <TARGET_DIR>

    The model extract command can extract a Starwhale model to a specified directory for further customization.

    MODEL is a model URI.

    OptionRequiredTypeDefaultDescription
    --force or -fNBooleanFalseIf this option is used, it will forcibly overwrite existing extracted model files in the target directory.

    Examples for model extract

    #- extract mnist model package to current directory
    swcli model extract mnist/version/xxxx .

    #- extract mnist model package to current directory and force to overwrite the files
    swcli model extract mnist/version/xxxx . -f

    swcli model history

    swcli [GLOBAL OPTIONS] model history [OPTIONS] <MODEL>

    model history outputs all history versions of the specified Starwhale Model.

    MODEL is a model URI.

    OptionRequiredTypeDefaultsDescription
    --fullnameNBooleanFalseShow the full version name. Only the first 12 characters are shown if this option is false.

    swcli model info

    swcli [GLOBAL OPTIONS] model info [OPTIONS] <MODEL>

    model info outputs detailed information about the specified Starwhale Model version.

    MODEL is a model URI.

    OptionRequiredTypeDefaultsDescription
    --output-filter or -ofNChoice of [basic/model_yaml/manifest/files/handlers/all]basicFilter the output content. Only standalone instance supports this option.

    Examples for model info

    swcli model info mnist # show basic info from the latest version of model
    swcli model info mnist/version/v0 # show basic info from the v0 version of model
    swcli model info mnist/version/latest --output-filter=all # show all info
    swcli model info mnist -of basic # show basic info
    swcli model info mnist -of model_yaml # show model.yaml
    swcli model info mnist -of handlers # show model runnable handlers info
    swcli model info mnist -of files # show model package files tree
    swcli -o json model info mnist -of all # show all info in json format

    swcli model list

    swcli [GLOBAL OPTIONS] model list [OPTIONS]

    model list shows all Starwhale Models.

    OptionRequiredTypeDefaultsDescription
    --projectNStringThe URI of the project to list. Use the default project if not specified.
    --fullnameNBooleanFalseShow the full version name. Only the first 12 characters are shown if this option is false.
    --show-removedNBooleanFalseIf true, include packages that are removed but not garbage collected.
    --pageNInteger1The starting page number. Server and cloud instances only.
    --sizeNInteger20The number of items in one page. Server and cloud instances only.
    --filter or -flNStringShow only Starwhale Models that match specified filters. This option can be used multiple times in one command.
    FilterTypeDescriptionExample
    nameKey-ValueThe name prefix of models--filter name=mnist
    ownerKey-ValueThe model owner name--filter owner=starwhale
    latestFlagIf specified, it shows only the latest version.--filter latest

    swcli model recover

    swcli [GLOBAL OPTIONS] model recover [OPTIONS] <MODEL>

    model recover recovers previously removed Starwhale Models or versions.

    MODEL is a model URI. If the version part of the URI is omitted, all removed versions are recovered.

    Garbage-collected Starwhale Models or versions can not be recovered, as well as those are removed with the --force option.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, overwrite the Starwhale Model or version with the same name or version id.

    swcli model remove

    swcli [GLOBAL OPTIONS] model remove [OPTIONS] <MODEL>

    model remove removes the specified Starwhale Model or version.

    MODEL is a model URI. If the version part of the URI is omitted, all versions are removed.

    Removed Starwhale Models or versions can be recovered by swcli model recover before garbage collection. Use the --force option to persistently remove a Starwhale Model or version.

    Removed Starwhale Models or versions can be listed by swcli model list --show-removed.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, persistently delete the Starwhale Model or version. It can not be recovered.

    swcli model run

    swcli [GLOBAL OPTIONS] model run [OPTIONS]

    model run executes a model handler. Model run supports two modes to run: model URI and local development. Model URI mode needs a pre-built Starwhale Model Package. Local development model only needs the model src dir.

    OptionRequiredTypeDefaultsDescription
    --workdir or -wNStringFor local development mode, the path of model src dir.
    --uri or -uNStringFor model URI mode, the string of model uri.
    --handler or -hNStringRunnable handler index or name, default is None, will use the first handler
    --module or -mNStringThe name of the Python module to import. This parameter can be set multiple times.
    --runtime or -rNStringthe Starwhale Runtime URI to use when running this command. If this option is used, this command will run in an independent python environment specified by the Starwhale Runtime; otherwise, it will run directly in the swcli's current python environment.
    --model-yaml-fNString${MODEL_DIR}/model.yamlThe path to the model.yaml. model.yaml is optional for model run.
    --run-project or -pNStringDefault projectProject URI, indicates the model run results will be stored in the corresponding project.
    --dataset or -dNStringDataset URI, the Starwhale dataset required for model running. This parameter can be set multiple times.
    --in-containerNBooleanFalseUse docker container to run the model. This option is only available for standalone instances. For server and cloud instances, a docker image is always used. If the runtime is a docker image, this option is always implied.
    --forbid-snapshot or -fsNBooleanFalseIn model URI mode, each model run uses a new snapshot directory. Setting this parameter will directly use the model's workdir as the run directory. In local dev mode, this parameter does not take effect, each run is in the --workdir specified directory.

    Examples for model run

    # --> run by model uri
    # run the first handler from model uri
    swcli model run -u mnist/version/latest
    # run index id(1) handler from model uri
    swcli model run --uri mnist/version/latest --handler 1
    # run index fullname(mnist.evaluator:MNISTInference.cmp) handler from model uri
    swcli model run --uri mnist/version/latest --handler mnist.evaluator:MNISTInference.cmp

    # --> run by the working directory, which does not build model package yet. Make local debug happy.
    # run the first handler from the working directory, use the model.yaml in the working directory
    swcli model run -w .
    # run index id(1) handler from the working directory, search mnist.evaluator module and model.yaml handlers(if existed) to get runnable handlers
    swcli model run --workdir . --module mnist.evaluator --handler 1
    # run index fullname(mnist.evaluator:MNISTInference.cmp) handler from the working directory, search mnist.evaluator module to get runnable handlers
    swcli model run --workdir . --module mnist.evaluator --handler mnist.evaluator:MNISTInference.cmp

    swcli model serve

    Here is the English translation:

    swcli [GLOBAL OPTIONS] model serve [OPTIONS]

    The model serve command can run the model as a web server, and provide a simple web interaction interface.

    OptionRequiredTypeDefaultsDescription
    --workdir or -wNStringIn local dev mode, specify the directory of the model code.
    --uri or -uNStringIn model URI mode, specify the model URI.
    --runtime or -rNStringThe URI of the Starwhale runtime to use when running this command. If specified, the command will run in the isolated Python environment defined in the Starwhale runtime. Otherwise it will run directly in the current Python environment of swcli.
    --model-yaml or -fNString${MODEL_DIR}/model.yamlThe path to the model.yaml. model.yaml is optional for model serve.
    --module or -mNStringName of the Python module to import. This parameter can be set multiple times.
    --hostNString127.0.0.1The address for the service to listen on.
    --portNInteger8080The port for the service to listen on.

    Examples for model serve

    swcli model serve -u mnist
    swcli model serve --uri mnist/version/latest --runtime pytorch/version/latest

    swcli model serve --workdir . --runtime pytorch/version/v0
    swcli model serve --workdir . --runtime pytorch/version/v1 --host 0.0.0.0 --port 8080
    swcli model serve --workdir . --runtime pytorch --module mnist.evaluator

    swcli model tag

    swcli [GLOBAL OPTIONS] model tag [OPTIONS] <MODEL> [TAGS]...

    model tag attaches a tag to a specified Starwhale Model version. At the same time, tag command also supports list and remove tags. The tag can be used in a model URI instead of the version id.

    MODEL is a model URI.

    Each model version can have any number of tags, but duplicated tag names are not allowed in the same model.

    model tag only works for the Standalone Instance.

    OptionRequiredTypeDefaultsDescription
    --remove or -rNBooleanFalseremove the tag if true
    --quiet or -qNBooleanFalseignore errors, for example, removing tags that do not exist.
    --force-add or -fNBooleanFalseWhen adding labels to server/cloud instances, if the label is already used by another model version, an error will be prompted. In this case, you can force an update using the --force-add parameter.

    Examples for model tag

    #- list tags of the mnist model
    swcli model tag mnist

    #- add tags for the mnist model
    swcli model tag mnist -t t1 -t t2
    swcli model tag cloud://cloud.starwhale.cn/project/public:starwhale/model/mnist/version/latest -t t1 --force-add
    swcli model tag mnist -t t1 --quiet

    #- remove tags for the mnist model
    swcli model tag mnist -r -t t1 -t t2
    swcli model tag cloud://cloud.starwhale.cn/project/public:starwhale/model/mnist --remove -t t1
    - + \ No newline at end of file diff --git a/0.5.10/reference/swcli/project/index.html b/0.5.10/reference/swcli/project/index.html index 605ab1f7e..5d790c4e2 100644 --- a/0.5.10/reference/swcli/project/index.html +++ b/0.5.10/reference/swcli/project/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.10

    swcli project

    Overview

    swcli [GLOBAL OPTIONS] project [OPTIONS] <SUBCOMMAND> [ARGS]...

    The project command includes the following subcommands:

    • create(add, new)
    • info
    • list(ls)
    • recover
    • remove(ls)
    • use(select)

    swcli project create

    swcli [GLOBAL OPTIONS] project create <PROJECT>

    project create creates a new project.

    PROJECT is a project URI.

    swcli project info

    swcli [GLOBAL OPTIONS] project info [OPTIONS] <PROJECT>

    project info outputs detailed information about the specified Starwhale Project.

    PROJECT is a project URI.

    swcli project list

    swcli [GLOBAL OPTIONS] project list [OPTIONS]

    project list shows all Starwhale Projects.

    OptionRequiredTypeDefaultsDescription
    --instanceNStringThe URI of the instance to list. If this option is omitted, use the default instance.
    --show-removedNBooleanFalseIf true, include projects that are removed but not garbage collected.
    --pageNInteger1The starting page number. Server and cloud instances only.
    --sizeNInteger20The number of items in one page. Server and cloud instances only.

    swcli project recover

    swcli [GLOBAL OPTIONS] project recover [OPTIONS] <PROJECT>

    project recover recovers previously removed Starwhale Projects.

    PROJECT is a project URI.

    Garbage-collected Starwhale Projects can not be recovered, as well as those are removed with the --force option.

    swcli project remove

    swcli [GLOBAL OPTIONS] project remove [OPTIONS] <PROJECT>

    project remove removes the specified Starwhale Project.

    PROJECT is a project URI.

    Removed Starwhale Projects can be recovered by swcli project recover before garbage collection. Use the --force option to persistently remove a Starwhale Project.

    Removed Starwhale Project can be listed by swcli project list --show-removed.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, persistently delete the Starwhale Project. It can not be recovered.

    swcli project use

    swcli [GLOBAL OPTIONS] project use <PROJECT>

    project use make the specified project default. You must login at first to use a project on a Server/Cloud instance.

    - + \ No newline at end of file diff --git a/0.5.10/reference/swcli/runtime/index.html b/0.5.10/reference/swcli/runtime/index.html index 13443eeb6..244526421 100644 --- a/0.5.10/reference/swcli/runtime/index.html +++ b/0.5.10/reference/swcli/runtime/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.10

    swcli runtime

    Overview

    swcli [GLOBAL OPTIONS] runtime [OPTIONS] <SUBCOMMAND> [ARGS]...

    The runtime command includes the following subcommands:

    • activate(actv)
    • build
    • copy(cp)
    • dockerize
    • extract
    • history
    • info
    • list(ls)
    • recover
    • remove(rm)
    • tag

    swcli runtime activate

    swcli [GLOBAL OPTIONS] runtime activate [OPTIONS] <RUNTIME>

    Like source venv/bin/activate or conda activate xxx, runtime activate setups a new python environment according to the settings of the specified runtime. When the current shell is closed or switched to another one, you need to reactivate the runtime.RUNTIME is a Runtime URI.

    If you want to quit the activated runtime environment, please run venv deactivate in the venv environment or conda deactivate in the conda environment.

    The runtime activate command will build a Python isolated environment and download relevant Python packages according to the definition of the Starwhale runtime when activating the environment for the first time. This process may spend a lot of time.

    swcli runtime build

    swcli [GLOBAL OPTIONS] runtime build [OPTIONS]

    The runtime build command can build a shareable and reproducible runtime environment suitable for ML/DL from various environments or runtime.yaml file.

    Parameters

    • Parameters related to runtime building methods:
    OptionRequiredTypeDefaultsDescription
    -c or --condaNStringFind the corresponding conda environment by conda env name, export Python dependencies to generate Starwhale runtime.
    -cp or --conda-prefixNStringFind the corresponding conda environment by conda env prefix path, export Python dependencies to generate Starwhale runtime.
    -v or --venvNStringFind the corresponding venv environment by venv directory address, export Python dependencies to generate Starwhale runtime.
    -s or --shellNStringExport Python dependencies according to current shell environment to generate Starwhale runtime.
    -y or --yamlNruntime.yaml in cwd directoryBuild Starwhale runtime according to user-defined runtime.yaml.
    -d or --dockerNStringUse the docker image as Starwhale runtime.

    The parameters for runtime building methods are mutually exclusive, only one method can be specified. If not specified, it will use --yaml method to read runtime.yaml in cwd directory to build Starwhale runtime.

    • Other parameters:
    OptionRequiredScopeTypeDefaultsDescription
    --project or -pNGlobalStringDefault projectProject URI
    -del or --disable-env-lockNruntime.yaml modeBooleanFalseWhether to install dependencies in runtime.yaml and lock the version information of related dependencies. The dependencies will be locked by default.
    -nc or --no-cacheNruntime.yaml modeBooleanFalseWhether to delete the isolated environment and install related dependencies from scratch. By default dependencies will be installed in the existing isolated environment.
    --cudaNconda/venv/shell modeChoice[11.3/11.4/11.5/11.6/11.7/]CUDA version, CUDA will not be used by default.
    --cudnnNconda/venv/shell modeChoice[8/]cuDNN version, cuDNN will not be used by default.
    --archNconda/venv/shell modeChoice[amd64/arm64/noarch]noarchArchitecture
    -epo or --emit-pip-optionsNGlobalBooleanFalseWhether to export ~/.pip/pip.conf, exported by default.
    -ecc or --emit-condarcNGlobalBooleanFalseWhether to export ~/.condarc, exported by default.
    -t or --tagNGlobalStringRuntime tags, the option can be used multiple times.

    Examples for Starwhale Runtime building

    #- from runtime.yaml:
    swcli runtime build # use the current directory as the workdir and use the default runtime.yaml file
    swcli runtime build -y example/pytorch/runtime.yaml # use example/pytorch/runtime.yaml as the runtime.yaml file
    swcli runtime build --yaml runtime.yaml # use runtime.yaml at the current directory as the runtime.yaml file
    swcli runtime build --tag tag1 --tag tag2

    #- from conda name:
    swcli runtime build -c pytorch # lock pytorch conda environment and use `pytorch` as the runtime name
    swcli runtime build --conda pytorch --name pytorch-runtime # use `pytorch-runtime` as the runtime name
    swcli runtime build --conda pytorch --cuda 11.4 # specify the cuda version
    swcli runtime build --conda pytorch --arch noarch # specify the system architecture

    #- from conda prefix path:
    swcli runtime build --conda-prefix /home/starwhale/anaconda3/envs/pytorch # get conda prefix path by `conda info --envs` command

    #- from venv prefix path:
    swcli runtime build -v /home/starwhale/.virtualenvs/pytorch
    swcli runtime build --venv /home/starwhale/.local/share/virtualenvs/pytorch --arch amd64

    #- from docker image:
    swcli runtime build --docker pytorch/pytorch:1.9.0-cuda11.1-cudnn8-runtime # use the docker image as the runtime directly

    #- from shell:
    swcli runtime build -s --cuda 11.4 --cudnn 8 # specify the cuda and cudnn version
    swcli runtime build --shell --name pytorch-runtime # lock the current shell environment and use `pytorch-runtime` as the runtime name

    swcli runtime copy

    swcli [GLOBAL OPTIONS] runtime copy [OPTIONS] <SRC> <DEST>

    runtime copy copies from SRC to DEST. SRC and DEST are both Runtime URIs.

    When copying Starwhale Runtime, all custom user-defined labels will be copied by default. You can use the --ignore-tag parameter to ignore certain labels. In addition, the latest and ^v\d+$ labels are built-in Starwhale system labels that are only used within the instance itself and will not be copied to other instances.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, DEST will be overwritten if it exists. In addition, if the labels carried during duplication have already been used by other versions, this parameter can be used to forcibly update the labels to this version.
    -i or --ignore-tagNStringIgnore tags to copy. The option can be used multiple times.

    Examples for Starwhale Runtime copy

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local project(myproject) with a new runtime name 'mnist-local'
    swcli runtime cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq local/project/myproject/mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local default project(self) with the cloud instance runtime name 'mnist-cloud'
    swcli runtime cp cloud://pre-k8s/project/runtime/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq .

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local project(myproject) with the cloud instance runtime name 'mnist-cloud'
    swcli runtime cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq . -dlp myproject

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local default project(self) with a runtime name 'mnist-local'
    swcli runtime cp cloud://pre-k8s/project/runtime/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local project(myproject) with a runtime name 'mnist-local'
    swcli runtime cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local -dlp myproject

    #- copy standalone instance(local) default project(self)'s mnist-local runtime to cloud instance(pre-k8s) mnist project with a new runtime name 'mnist-cloud'
    swcli runtime cp mnist-local/version/latest cloud://pre-k8s/project/mnist/mnist-cloud

    #- copy standalone instance(local) default project(self)'s mnist-local runtime to cloud instance(pre-k8s) mnist project with standalone instance runtime name 'mnist-local'
    swcli runtime cp mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy standalone instance(local) default project(self)'s mnist-local runtime to cloud instance(pre-k8s) mnist project without 'cloud://' prefix
    swcli runtime cp mnist-local/version/latest pre-k8s/project/mnist

    #- copy standalone instance(local) project(myproject)'s mnist-local runtime to cloud instance(pre-k8s) mnist project with standalone instance runtime name 'mnist-local'
    swcli runtime cp local/project/myproject/runtime/mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy without some tags
    swcli runtime cp pytorch cloud://cloud.starwhale.cn/project/starwhale:public --ignore-tag t1

    swcli runtime dockerize

    swcli [GLOBAL OPTIONS] runtime dockerize [OPTIONS] <RUNTIME>

    runtime dockerize generates a docker image based on the specified runtime. Starwhale uses docker buildx to create the image. Docker 19.03 or later is required to run this command.

    RUNTIME is a Runtime URI.

    OptionRequiredTypeDefaultsDescription
    --tag or -tNStringThe tag of the docker image. This option can be repeated multiple times.
    --pushNBooleanFalseIf true, push the image to the docker registry
    --platformNStringamd64The target platform,can be either amd64 or arm64. This option can be repeated multiple times to create a multi-platform image.

    Here is the English translation:

    swcli runtime extract

    swcli [Global Options] runtime extract [Options] <RUNTIME>

    Starwhale runtimes use the compressed packages to distribute. The runtime extract command can be used to extract the runtime package for further customization and modification.

    OptionRequiredTypeDefaultDescription
    --force or -fNBooleanFalseWhether to delete and re-extract if there is already an extracted Starwhale runtime in the target directory.
    --target-dirNStringCustom extraction directory. If not specified, it will be extracted to the default Starwhale runtime workdir. The command log will show the directory location.

    swcli runtime history

    swcli [GLOBAL OPTIONS] runtime history [OPTIONS] <RUNTIME>

    runtime history outputs all history versions of the specified Starwhale Runtime.

    RUNTIME is a Runtime URI.

    OptionRequiredTypeDefaultsDescription
    --fullnameNBooleanFalseShow the full version name. Only the first 12 characters are shown if this option is false.

    swcli runtime info

    swcli [GLOBAL OPTIONS] runtime info [OPTIONS] <RUNTIME>

    runtime info outputs detailed information about a specified Starwhale Runtime version.

    RUNTIME is a Runtime URI.

    OptionRequiredTypeDefaultsDescription
    --output-filter or -ofNChoice of [basic/runtime_yaml/manifest/lock/all]basicFilter the output content. Only standalone instance supports this option.

    Examples for Starwhale Runtime info

    swcli runtime info pytorch # show basic info from the latest version of runtime
    swcli runtime info pytorch/version/v0 # show basic info
    swcli runtime info pytorch/version/v0 --output-filter basic # show basic info
    swcli runtime info pytorch/version/v1 -of runtime_yaml # show runtime.yaml content
    swcli runtime info pytorch/version/v1 -of lock # show auto lock file content
    swcli runtime info pytorch/version/v1 -of manifest # show _manifest.yaml content
    swcli runtime info pytorch/version/v1 -of all # show all info of the runtime

    swcli runtime list

    swcli [GLOBAL OPTIONS] runtime list [OPTIONS]

    runtime list shows all Starwhale Runtimes.

    OptionRequiredTypeDefaultsDescription
    --projectNStringThe URI of the project to list. Use the default project if not specified.
    --fullnameNBooleanFalseShow the full version name. Only the first 12 characters are shown if this option is false.
    --show-removed or -srNBooleanFalseIf true, include runtimes that are removed but not garbage collected.
    --pageNInteger1The starting page number. Server and cloud instances only.
    --sizeNInteger20The number of items in one page. Server and cloud instances only.
    --filter or -flNStringShow only Starwhale Runtimes that match specified filters. This option can be used multiple times in one command.
    FilterTypeDescriptionExample
    nameKey-ValueThe name prefix of runtimes--filter name=pytorch
    ownerKey-ValueThe runtime owner name--filter owner=starwhale
    latestFlagIf specified, it shows only the latest version.--filter latest

    swcli runtime recover

    swcli [GLOBAL OPTIONS] runtime recover [OPTIONS] <RUNTIME>

    runtime recover can recover previously removed Starwhale Runtimes or versions.

    RUNTIME is a Runtime URI. If the version part of the URI is omitted, all removed versions are recovered.

    Garbage-collected Starwhale Runtimes or versions can not be recovered, as well as those are removed with the --force option.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, overwrite the Starwhale Runtime or version with the same name or version id.

    swcli runtime remove

    swcli [GLOBAL OPTIONS] runtime remove [OPTIONS] <RUNTIME>

    runtime remove removes the specified Starwhale Runtime or version.

    RUNTIME is a Runtime URI. If the version part of the URI is omitted, all versions are removed.

    Removed Starwhale Runtimes or versions can be recovered by swcli runtime recover before garbage collection. Use the -- force option to persistently remove a Starwhale Runtime or version.

    Removed Starwhale Runtimes or versions can be listed by swcli runtime list --show-removed.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, persistently delete the Starwhale Runtime or version. It can not be recovered.

    swcli runtime tag

    swcli [GLOBAL OPTIONS] runtime tag [OPTIONS] <RUNTIME> [TAGS]...

    runtime tag attaches a tag to a specified Starwhale Runtime version. At the same time, tag command also supports list and remove tags. The tag can be used in a runtime URI instead of the version id.

    RUNTIME is a Runtime URI.

    Each runtime version can have any number of tags, but duplicated tag names are not allowed in the same runtime.

    runtime tag only works for the Standalone Instance.

    OptionRequiredTypeDefaultsDescription
    --remove or -rNBooleanFalseRemove the tag if true
    --quiet or -qNBooleanFalseIgnore errors, for example, removing tags that do not exist.
    --force-add or -fNBooleanFalseWhen adding labels to server/cloud instances, if the label is already used by another runtime version, an error will be prompted. In this case, you can force an update using the --force-add parameter.

    Examples for runtime tag

    #- list tags of the pytorch runtime
    swcli runtime tag pytorch

    #- add tags for the pytorch runtime
    swcli runtime tag mnist -t t1 -t t2
    swcli runtime tag cloud://cloud.starwhale.cn/project/public:starwhale/runtime/pytorch/version/latest -t t1 --force-add
    swcli runtime tag mnist -t t1 --quiet

    #- remove tags for the pytorch runtime
    swcli runtime tag mnist -r -t t1 -t t2
    swcli runtime tag cloud://cloud.starwhale.cn/project/public:starwhale/runtime/pytorch --remove -t t1
    - + \ No newline at end of file diff --git a/0.5.10/reference/swcli/utilities/index.html b/0.5.10/reference/swcli/utilities/index.html index 18f0cb036..c1b2d50d8 100644 --- a/0.5.10/reference/swcli/utilities/index.html +++ b/0.5.10/reference/swcli/utilities/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.10

    Utility Commands

    swcli gc

    swcli [GLOBAL OPTIONS] gc [OPTIONS]

    gc clears removed projects, models, datasets, and runtimes according to the internal garbage collection policy.

    OptionRequiredTypeDefaultsDescription
    --dry-runNBooleanFalseIf true, outputs objects to be removed instead of clearing them.
    --yesNBooleanFalseBypass confirmation prompts.

    swcli check

    swcli [GLOBAL OPTIONS] check

    Check if the external dependencies of the swcli command meet the requirements. Currently mainly checks Docker and Conda.

    swcli completion install

    swcli [GLOBAL OPTIONS] completion install <SHELL_NAME>

    Install autocompletion for swcli commands. Currently supports bash, zsh and fish. If SHELL_NAME is not specified, it will try to automatically detect the current shell type.

    swcli config edit

    swcli [GLOBAL OPTIONS] config edit

    Edit the Starwhale configuration file at ~/.config/starwhale/config.yaml.

    swcli ui

    swcli [GLOBAL OPTIONS] ui <INSTANCE>

    Open the web page for the corresponding instance.

    - + \ No newline at end of file diff --git a/0.5.10/runtime/index.html b/0.5.10/runtime/index.html index 08f83046d..9343c556f 100644 --- a/0.5.10/runtime/index.html +++ b/0.5.10/runtime/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.10

    Starwhale Runtime

    Overview

    Starwhale Runtime aims to provide a reproducible and sharable running environment for python programs. You can easily share your working environment with your teammates or outsiders, and vice versa. Furthermore, you can run your programs on Starwhale Server or Starwhale Cloud without bothering with the dependencies.

    Starwhale works well with virtualenv, conda, and docker. If you are using one of them, it is straightforward to create a Starwhale Runtime based on your current environment.

    Multiple Starwhale Runtimes on your local machine can be switched freely by one command. You can work on different projects without messing up the environment.Starwhale Runtime consists of two parts: the base image and the dependencies.

    The base image

    The base is a docker image with Python, CUDA, and cuDNN installed. Starwhale provides various base images for you to choose from; see the following list:

    • Computer system architecture:
      • X86 (amd64)
      • Arm (aarch64)
    • Operating system:
      • Ubuntu 20.04 LTS (ubuntu:20.04)
    • Python:
      • 3.7
      • 3.8
      • 3.9
      • 3.10
      • 3.11
    • CUDA:
      • CUDA 11.3 + cuDNN 8.4
      • CUDA 11.4 + cuDNN 8.4
      • CUDA 11.5 + cuDNN 8.4
      • CUDA 11.6 + cuDNN 8.4
      • CUDA 11.7

    runtime.yaml

    runtime.yaml is the core configuration file of Starwhale Runtime.

    # The name of Starwhale Runtime
    name: demo
    # The mode of Starwhale Runtime: venv or conda. Default is venv.
    mode: venv
    configs:
    # If you do not use conda, ignore this field.
    conda:
    condarc: # custom condarc config file
    channels:
    - defaults
    show_channel_urls: true
    default_channels:
    - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main
    - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/r
    - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/msys2
    custom_channels:
    conda-forge: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
    pytorch: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
    pytorch-lts: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
    nvidia: https://mirrors.aliyun.com/anaconda/cloud
    ssl_verify: false
    default_threads: 10
    pip:
    # pip config set global.index-url
    index_url: https://example.org/
    # pip config set global.extra-index-url
    extra_index_url: https://another.net/
    # pip config set install.trusted-host
    trusted_host:
    - example.org
    - another.net
    environment:
    # Now it must be ubuntu:20.04
    os: ubuntu:20.04
    # CUDA version. possible values: 11.3, 11.4, 11.5, 11.6, 11.7
    cuda: 11.4
    # Python version. possible values: 3.7, 3.8, 3.9, 3.10, 3.11
    python: 3.8
    # Define your base image
    docker:
    image: mycustom.com/docker/image:tag
    dependencies:
    # If this item is present, conda env create -f conda.yml will be executed
    - conda.yaml
    # If this item is present, pip install -r requirements.txt will be executed before installing other pip packages
    - requirements.txt
    # Packages to be install with conda. venv mode will ignore the conda field.
    - conda:
    - numpy
    - requests
    # Packages to be installed with pip. The format is the same as requirements.txt
    - pip:
    - pillow
    - numpy
    - deepspeed==0.9.0
    - safetensors==0.3.0
    - transformers @ git+https://github.com/huggingface/transformers.git@3c3108972af74246bc3a0ecf3259fd2eafbacdef
    - peft @ git+https://github.com/huggingface/peft.git@fcff23f005fc7bfb816ad1f55360442c170cd5f5
    - accelerate @ git+https://github.com/huggingface/accelerate.git@eba6eb79dc2ab652cd8b44b37165a4852768a8ac
    # Additional wheels packages to be installed when restoring the runtime
    - wheels:
    - dummy-0.0.0-py3-none-any.whl
    # Additional files to be included in the runtime
    - files:
    - dest: bin/prepare.sh
    name: prepare
    src: scripts/prepare.sh
    # Run some custom commands
    - commands:
    - apt-get install -y libgl1
    - touch /tmp/runtime-command-run.flag
    - + \ No newline at end of file diff --git a/0.5.10/runtime/yaml/index.html b/0.5.10/runtime/yaml/index.html index 155da7f01..cab08ee1d 100644 --- a/0.5.10/runtime/yaml/index.html +++ b/0.5.10/runtime/yaml/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.10

    The runtime.yaml Specification

    runtime.yaml is the configuration file that defines the properties of the Starwhale Runtime. runtime.yaml is required for the yaml mode of the swcli runtime build command.

    Examples

    The simplest example

    dependencies:
    - pip:
    - numpy
    name: simple-test

    Define a Starwhale Runtime that uses venv as the Python virtual environment for package isolation, and installs the numpy dependency.

    The llama2 example

    name: llama2
    mode: venv
    environment:
    arch: noarch
    os: ubuntu:20.04
    cuda: 11.7
    python: "3.10"
    dependencies:
    - pip:
    - torch
    - fairscale
    - fire
    - sentencepiece
    - gradio >= 3.37.0
    # external starwhale dependencies
    - starwhale[serve] >= 0.5.5

    The full definition example

    # [required]The name of Starwhale Runtime
    name: demo
    # [optional]The mode of Starwhale Runtime: venv or conda. Default is venv.
    mode: venv
    # [optional]The configurations of pip and conda.
    configs:
    # If you do not use conda, ignore this field.
    conda:
    condarc: # custom condarc config file
    channels:
    - defaults
    show_channel_urls: true
    default_channels:
    - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main
    - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/r
    - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/msys2
    custom_channels:
    conda-forge: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
    pytorch: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
    pytorch-lts: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
    nvidia: https://mirrors.aliyun.com/anaconda/cloud
    ssl_verify: false
    default_threads: 10
    pip:
    # pip config set global.index-url
    index_url: https://example.org/
    # pip config set global.extra-index-url
    extra_index_url: https://another.net/
    # pip config set install.trusted-host
    trusted_host:
    - example.org
    - another.net
    # [optional] The definition of the environment.
    environment:
    # Now it must be ubuntu:20.04
    os: ubuntu:20.04
    # CUDA version. possible values: 11.3, 11.4, 11.5, 11.6, 11.7
    cuda: 11.4
    # Python version. possible values: 3.7, 3.8, 3.9, 3.10, 3.11
    python: 3.8
    # Define your custom base image
    docker:
    image: mycustom.com/docker/image:tag
    # [required] The dependencies of the Starwhale Runtime.
    dependencies:
    # If this item is present, conda env create -f conda.yml will be executed
    - conda.yaml
    # If this item is present, pip install -r requirements.txt will be executed before installing other pip packages
    - requirements.txt
    # Packages to be install with conda. venv mode will ignore the conda field.
    - conda:
    - numpy
    - requests
    # Packages to be installed with pip. The format is the same as requirements.txt
    - pip:
    - pillow
    - numpy
    - deepspeed==0.9.0
    - safetensors==0.3.0
    - transformers @ git+https://github.com/huggingface/transformers.git@3c3108972af74246bc3a0ecf3259fd2eafbacdef
    - peft @ git+https://github.com/huggingface/peft.git@fcff23f005fc7bfb816ad1f55360442c170cd5f5
    - accelerate @ git+https://github.com/huggingface/accelerate.git@eba6eb79dc2ab652cd8b44b37165a4852768a8ac
    # Additional wheels packages to be installed when restoring the runtime
    - wheels:
    - dummy-0.0.0-py3-none-any.whl
    # Additional files to be included in the runtime
    - files:
    - dest: bin/prepare.sh
    name: prepare
    src: scripts/prepare.sh
    # Run some custom commands
    - commands:
    - apt-get install -y libgl1
    - touch /tmp/runtime-command-run.flag
    - + \ No newline at end of file diff --git a/0.5.10/server/guides/server_admin/index.html b/0.5.10/server/guides/server_admin/index.html index d3d61fbaa..3dc29a3ff 100644 --- a/0.5.10/server/guides/server_admin/index.html +++ b/0.5.10/server/guides/server_admin/index.html @@ -10,14 +10,14 @@ - +
    Skip to main content
    Version: 0.5.10

    Controller Admin Settings

    Superuser Password Reset

    In case you forget the superusers password, you could use the sql below to reset the password to abcd1234

    update user_info set user_pwd='ee9533077d01d2d65a4efdb41129a91e', user_pwd_salt='6ea18d595773ccc2beacce26' where id=1

    After that, you could login to the console and then change the password to what you really want.

    System Settings

    You could customize system to make it easier to use by leverage of System setting. Here is an example below:

    dockerSetting:
    registryForPull: "docker-registry.starwhale.cn/star-whale"
    registryForPush: ""
    userName: ""
    password: ""
    insecure: true
    pypiSetting:
    indexUrl: ""
    extraIndexUrl: ""
    trustedHost: ""
    retries: 10
    timeout: 90
    imageBuild:
    resourcePool: ""
    image: ""
    clientVersion: ""
    pythonVersion: ""
    datasetBuild:
    resourcePool: ""
    image: ""
    clientVersion: ""
    pythonVersion: ""
    resourcePoolSetting:
    - name: "default"
    nodeSelector: null
    resources:
    - name: "cpu"
    max: null
    min: null
    defaults: 5.0
    - name: "memory"
    max: null
    min: null
    defaults: 3145728.0
    - name: "nvidia.com/gpu"
    max: null
    min: null
    defaults: null
    tolerations: null
    metadata: null
    isPrivate: null
    visibleUserIds: null
    storageSetting:
    - type: "minio"
    tokens:
    bucket: "users"
    ak: "starwhale"
    sk: "starwhale"
    endpoint: "http://10.131.0.1:9000"
    region: "local"
    hugeFileThreshold: "10485760"
    hugeFilePartSize: "5242880"
    - type: "s3"
    tokens:
    bucket: "users"
    ak: "starwhale"b
    sk: "starwhale"
    endpoint: "http://10.131.0.1:9000"
    region: "local"
    hugeFileThreshold: "10485760"
    hugeFilePartSize: "5242880"

    Image Registry

    Tasks dispatched by the server are based on docker images. Pulling these images could be slow if your internet is not working well. Starwhale Server supports the custom image registries, includes dockerSetting.registryForPush and dockerSetting.registryForPull.

    Resource Pool

    The resourcePoolSetting allows you to manage your cluster in a group manner. It is currently implemented by K8S nodeSelector, you could label your machines in K8S cluster and make them a resourcePool in Starwhale.

    Remote Storage

    The storageSetting allows you to manage the storages the server could access.

    storageSetting:
    - type: s3
    tokens:
    - bucket: starwhale # required
    ak: access_key # required
    sk: scret_key # required
    endpoint: http://s3.region.amazonaws.com # optional
    region: region of the service # required when endpoint is empty
    hugeFileThreshold: 10485760 # bigger than 10MB will use multiple part upload
    hugeFilePartSize: 5242880 # MB part size for multiple part upload
    - type: minio
    tokens:
    - bucket: starwhale # required
    ak: access_key # required
    sk: scret_key # required
    endpoint: http://10.131.0.1:9000 # required
    region: local # optional
    hugeFileThreshold: 10485760 # bigger than 10MB will use multiple part upload
    hugeFilePartSize: 5242880 # MB part size for multiple part upload
    - type: aliyun
    tokens:
    - bucket: starwhale # required
    ak: access_key # required
    sk: scret_key # required
    endpoint: http://10.131.0.2:9000 # required
    region: local # optional
    hugeFileThreshold: 10485760 # bigger than 10MB will use multiple part upload
    hugeFilePartSize: 5242880 # MB part size for multiple part upload

    Every storageSetting item has a corresponding implementation of StorageAccessService interface. Starwhale has four build-in implementations:

    • StorageAccessServiceAliyun matches type in (aliyun,oss)
    • StorageAccessServiceMinio matches type in (minio)
    • StorageAccessServiceS3 matches type in (s3)
    • StorageAccessServiceFile matches type in (fs, file)

    Each of the implementations has different requirements for tokens. endpoint is required when type in (aliyun,minio), region is required when type is s3 and endpoint is empty. While fs/file type requires tokens has name rootDir and serviceProvider. Please refer the code for more details.

    - + \ No newline at end of file diff --git a/0.5.10/server/index.html b/0.5.10/server/index.html index 687a78860..96c379f81 100644 --- a/0.5.10/server/index.html +++ b/0.5.10/server/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    - + \ No newline at end of file diff --git a/0.5.10/server/installation/docker/index.html b/0.5.10/server/installation/docker/index.html index e2658b2e1..91b48fb98 100644 --- a/0.5.10/server/installation/docker/index.html +++ b/0.5.10/server/installation/docker/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.10

    Install Starwhale Server with Docker

    Prerequisites

    • A running Kubernetes 1.19+ cluster to run tasks.
    • A running MySQL 8.0+ instance to store metadata.
    • A S3-compatible object storage to save datasets, models, and others.

    Please make sure pods on the Kubernetes cluster can access the port exposed by the Starwhale Server installation.

    Prepare an env file for Docker

    Starwhale Server can be configured by environment variables.

    An env file template for Docker is here. You may create your own env file by modifying the template.

    Prepare a kubeconfig file

    The kubeconfig file is used for accessing the Kubernetes cluster. For more information about kubeconfig files, see the Official Kubernetes Documentation.

    If you have a local kubectl command-line tool installed, you can run kubectl config view to see your current configuration.

    Run the Docker image

    docker run -it -d --name starwhale-server -p 8082:8082 \
    --restart unless-stopped \
    --mount type=bind,source=<path to your kubeconfig file>,destination=/root/.kube/config,readonly \
    --env-file <path to your env file> \
    ghcr.io/star-whale/server:0.5.6

    For users in the mainland of China, use docker image: docker-registry.starwhale.cn/star-whale/server.

    - + \ No newline at end of file diff --git a/0.5.10/server/installation/helm-charts/index.html b/0.5.10/server/installation/helm-charts/index.html index 3a0d703a6..2a52eb071 100644 --- a/0.5.10/server/installation/helm-charts/index.html +++ b/0.5.10/server/installation/helm-charts/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.10

    Install Starwhale Server with Helm

    Prerequisites

    • A running Kubernetes 1.19+ cluster to run tasks.
    • A running MySQL 8.0+ instance to store metadata.
    • A S3-compatible object storage system to save datasets, models, and others.
    • Helm 3.2.0+.

    The Starwhale Helm Charts includes MySQL and MinIO as dependencies. If you do not have your own MySQL instance or any S3-compatible object storage available, use the Helm Charts to install. Please check Installation Options to learn how to install Starwhale Server with MySQL and MinIO.

    Create a service account on Kubernetes for Starwhale Server

    If Kubernetes RBAC is enabled (In Kubernetes 1.6+, RBAC is enabled by default), Starwhale Server can not work properly unless is started by a service account with at least the following permissions:

    ResourceAPI GroupGetListWatchCreateDelete
    jobsbatchYYYYY
    podscoreYYY
    nodescoreYYY
    events""Y

    Example:

    apiVersion: rbac.authorization.k8s.io/v1
    kind: ClusterRole
    metadata:
    name: starwhale-role
    rules:
    - apiGroups:
    - ""
    resources:
    - pods
    - nodes
    verbs:
    - get
    - list
    - watch
    - apiGroups:
    - "batch"
    resources:
    - jobs
    verbs:
    - create
    - get
    - list
    - watch
    - delete
    - apiGroups:
    - ""
    resources:
    - events
    verbs:
    - get
    - watch
    - list
    ---
    apiVersion: rbac.authorization.k8s.io/v1
    kind: ClusterRoleBinding
    metadata:
    name: starwhale-binding
    roleRef:
    apiGroup: rbac.authorization.k8s.io
    kind: ClusterRole
    name: starwhale-role
    subjects:
    - kind: ServiceAccount
    name: starwhale

    Downloading Starwhale Helm Charts

    helm repo add starwhale https://star-whale.github.io/charts
    helm repo update

    Installing Starwhale Server

    helm install starwhale-server starwhale/starwhale-server -n starwhale --create-namespace

    If you have a local kubectl command-line tool installed, you can run kubectl get pods -n starwhale to check if all pods are running.

    Updating Starwhale Server

    helm repo update
    helm upgrade starwhale-server starwhale/starwhale-server

    Uninstalling Starwhale Server

    helm delete starwhale-server
    - + \ No newline at end of file diff --git a/0.5.10/server/installation/index.html b/0.5.10/server/installation/index.html index dc313e44b..7a5554e03 100644 --- a/0.5.10/server/installation/index.html +++ b/0.5.10/server/installation/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    - + \ No newline at end of file diff --git a/0.5.10/server/installation/minikube/index.html b/0.5.10/server/installation/minikube/index.html index a2ee2799a..677f800dc 100644 --- a/0.5.10/server/installation/minikube/index.html +++ b/0.5.10/server/installation/minikube/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.10

    Install Starwhale Server with Minikube

    Prerequisites

    Starting Minikube

    minikube start --addons ingress --kubernetes-version=1.25.3

    For users in the mainland of China, please add --image-mirror-country=cn parameter. If there is no kubectl bin in your machine, you may use minikube kubectl or alias kubectl="minikube kubectl --" alias command.

    Installing Starwhale Server

    helm repo add starwhale https://star-whale.github.io/charts
    helm repo update
    helm pull starwhale/starwhale --untar --untardir ./charts

    helm upgrade --install starwhale ./charts/starwhale -n starwhale --create-namespace -f ./charts/starwhale/values.minikube.global.yaml

    For users in the mainland of China, use values.minikube.global.yaml:

    helm upgrade --install starwhale ./charts/starwhale -n starwhale --create-namespace -f ./charts/starwhale/values.minikube.cn.yaml

    After the installation is successful, the following prompt message appears:

        Release "starwhale" has been upgraded. Happy Helming!
    NAME: starwhale
    LAST DEPLOYED: Tue Feb 14 16:25:03 2023
    NAMESPACE: starwhale
    STATUS: deployed
    REVISION: 14
    NOTES:
    ******************************************
    Chart Name: starwhale
    Chart Version: 0.5.6
    App Version: latest
    Starwhale Image:
    - server: ghcr.io/star-whale/server:latest

    ******************************************
    Controller:
    - visit: http://controller.starwhale.svc
    Minio:
    - web visit: http://minio.starwhale.svc
    - admin visit: http://minio-admin.starwhale.svc
    MySQL:
    - port-forward:
    - run: kubectl port-forward --namespace starwhale svc/mysql 3306:3306
    - visit: mysql -h 127.0.0.1 -P 3306 -ustarwhale -pstarwhale
    Please run the following command for the domains searching:
    echo "$(sudo minikube ip) controller.starwhale.svc minio.starwhale.svc minio-admin.starwhale.svc " | sudo tee -a /etc/hosts
    ******************************************
    Login Info:
    - starwhale: u:starwhale, p:abcd1234
    - minio admin: u:minioadmin, p:minioadmin

    *_* Enjoy to use Starwhale Platform. *_*

    Checking Starwhale Server status

    Keep checking the minikube service status until all deployments are running(waiting for 3~5 mins):

    kubectl get deployments -n starwhale
    NAMEREADYUP-TO-DATEAVAILABLEAGE
    controller1/1115m
    minio1/1115m
    mysql1/1115m

    Visiting for local

    Make the Starwhale controller accessible locally with the following command:

    echo "$(sudo minikube ip) controller.starwhale.svc minio.starwhale.svc  minio-admin.starwhale.svc " | sudo tee -a /etc/hosts

    Then you can visit http://controller.starwhale.svc in your local web browser.

    Visiting for others

    • Step 1: in the Starwhale Server machine

      for temporary use with socat command:

      # install socat at first, ref: https://howtoinstall.co/en/socat
      sudo socat TCP4-LISTEN:80,fork,reuseaddr,bind=0.0.0.0 TCP4:`minikube ip`:80

      When you kill the socat process, the share access will be blocked. iptables maybe a better choice for long-term use.

    • Step 2: in the other machines

      # for macOSX or Linux environment, run the command in the shell.
      echo ${your_machine_ip} controller.starwhale.svc minio.starwhale.svc minio-admin.starwhale.svc " | sudo tee -a /etc/hosts

      # for Windows environment, run the command in the PowerShell with administrator permission.
      Add-Content -Path C:\Windows\System32\drivers\etc\hosts -Value "`n${your_machine_ip} controller.starwhale.svc minio.starwhale.svc minio-admin.starwhale.svc"
    - + \ No newline at end of file diff --git a/0.5.10/server/installation/starwhale_env/index.html b/0.5.10/server/installation/starwhale_env/index.html index 93bd85525..3f0d99c85 100644 --- a/0.5.10/server/installation/starwhale_env/index.html +++ b/0.5.10/server/installation/starwhale_env/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.10

    Starwhale Server Environment Example

    ################################################################################
    # *** Required ***
    # The external Starwhale server URL. For example: https://cloud.starwhale.ai
    SW_INSTANCE_URI=

    # The listening port of Starwhale Server
    SW_CONTROLLER_PORT=8082

    # The maximum upload file size. This setting affects datasets and models uploading when copied from outside.
    SW_UPLOAD_MAX_FILE_SIZE=20480MB
    ################################################################################
    # The base URL of the Python Package Index to use when creating a runtime environment.
    SW_PYPI_INDEX_URL=http://10.131.0.1/repository/pypi-hosted/simple/

    # Extra URLs of package indexes to use in addition to the base url.
    SW_PYPI_EXTRA_INDEX_URL=

    # Space separated hostnames. When any host specified in the base URL or extra URLs does not have a valid SSL
    # certification, use this option to trust it anyway.
    SW_PYPI_TRUSTED_HOST=
    ################################################################################
    # The JWT token expiration time. When the token expires, the server will request the user to login again.
    SW_JWT_TOKEN_EXPIRE_MINUTES=43200

    # *** Required ***
    # The JWT secret key. All strings are valid, but we strongly recommend you to use a random string with at least 16 characters.
    SW_JWT_SECRET=
    ################################################################################
    # The Kubernetes namespace to use when running a task
    SW_K8S_NAME_SPACE=default

    # The path on the Kubernetes host node's filesystem to cache Python packages. Use the setting only if you have
    # the permission to use host node's filesystem. The runtime environment setup process may be accelerated when the host
    # path cache is used. Leave it blank if you do not want to use it.
    SW_K8S_HOST_PATH_FOR_CACHE=

    ###############################################################################
    # *** Required ***
    # The object storage system type. Valid values are:
    # s3: [AWS S3](https://aws.amazon.com/s3) or other s3-compatible object storage systems
    # aliyun: [Aliyun OSS](https://www.alibabacloud.com/product/object-storage-service)
    # minio: [MinIO](https://min.io)
    # file: Local filesystem
    SW_STORAGE_TYPE=

    # The path prefix for all data saved on the storage system.
    SW_STORAGE_PREFIX=
    ################################################################################
    # The following settings are only used when SW_STORAGE_TYPE is file.

    # The root directory to save data.
    # This setting is only used when SW_STORAGE_TYPE is file.
    SW_STORAGE_FS_ROOT_DIR=/usr/local/starwhale
    ################################################################################
    # The following settings are only used when SW_STORAGE_TYPE is not file.

    # *** Required ***
    # The name of the bucket to save data.
    SW_STORAGE_BUCKET=

    # *** Required ***
    # The endpoint URL of the object storage service.
    # This setting is only used when SW_STORAGE_TYPE is s3 or aliyun.
    SW_STORAGE_ENDPOINT=

    # *** Required ***
    # The access key used to access the object storage system.
    SW_STORAGE_ACCESSKEY=

    # *** Required ***
    # The secret access key used to access the object storage system.
    SW_STORAGE_SECRETKEY=

    # *** Optional ***
    # The region of the object storage system.
    SW_STORAGE_REGION=

    # Starwhale Server will use multipart upload when uploading a large file. This setting specifies the part size.
    SW_STORAGE_PART_SIZE=5MB
    ################################################################################
    # MySQL settings

    # *** Required ***
    # The hostname/IP of the MySQL server.
    SW_METADATA_STORAGE_IP=

    # The port of the MySQL server.
    SW_METADATA_STORAGE_PORT=3306

    # *** Required ***
    # The database used by Starwhale Server
    SW_METADATA_STORAGE_DB=starwhale

    # *** Required ***
    # The username of the MySQL server.
    SW_METADATA_STORAGE_USER=

    # *** Required ***
    # The password of the MySQL server.
    SW_METADATA_STORAGE_PASSWORD=
    ################################################################################
    - + \ No newline at end of file diff --git a/0.5.10/server/project/index.html b/0.5.10/server/project/index.html index 095c6fc2c..0b83202ac 100644 --- a/0.5.10/server/project/index.html +++ b/0.5.10/server/project/index.html @@ -10,7 +10,7 @@ - + @@ -18,7 +18,7 @@
    Skip to main content
    Version: 0.5.10

    Project Management

    Project type

    There are two types of projects:

    • Public: Visible to anyone. Everyone on the internet can find and see public projects.

    • Private: Visible to users specified in the project member settings. Private projects can only be seen by project owners and project members. The project owner can manage access in the project setting of Manage Member.

    Create a project

    1 Sign in to Starwhale, click Create Project.

    creat

    2 Type a name for the project.

    image

    tip

    Avoid duplicate project names.For more information, see Names in Starwhale

    3 Select project visibility to decide who can find and see the project.

    image

    4 Type a description. It is optional.

    image

    5 To finish, click Submit.

    image

    Edit a project

    The name, privacy and description of a project can be edited.

    tip

    Users with the project owner or maintainer role can edit a project. For more information, see Roles and permissions

    Edit name

    • If you are on the project list page:

      1 Hover your mouse over the project you want to edit, then click the Edit button.

      image

      2 Enter a new name for the project.

      image

      tip

      Avoid duplicate project names. For more information, see Names in Starwhale

      3 Click Submit to save changes.

      image

      4 If you're editing multiple projects, repeat steps 1 through 3.

    • If you are on a specific project:

      1 Select Overview on the left navigation, and click Edit.

      image

      2 Enter a new name for the project.

      image

      tip

      Avoid duplicate project names. For more information, see Names in Starwhale

      3 Click Submit to save changes.

      image

    Edit privacy

    • If you are on the project list page:

      1 Hover your mouse over the project you want to edit, then click the Edit button.

      image

      2 Click the Public or Private by your command. For more information, see Project types.

      image

      3 Click Submit to save changes.

      image

    • If you are on a specific project

      1 Select Overview on the left navigation, and click Edit.

      image

      2 Click the Public or Private by your command. For more information, see Project types.

      image

      3 Click Submit to save changes.

      image

    Edit description

    • If you are on the project list page:

      1 Hover your mouse over the project you want to edit, then click the Edit button.

      image

      2 Enter any description you want to describe the project.

      image

      3 Click Submit to save changes.

      image

    • If you are on a specific project

      1 Select Overview on the left navigation, and click Edit.

      image

      2 Enter any description you want to describe the project.

      image

      3 Click Submit to save changes.

      image

    Delete a project

    1 Hover your mouse over the project you want to delete, then click the Delete button.

    image

    2 If you are sure to delete, type the exact name of the project and then click Confirm to delete the project.

    image

    :::Important: When you delete a project, all the models, datasets, evaluations and runtimes belonging to the project will also be deleted and can not be restored. Be careful about the action. :::

    Manage project member

    Only users with the admin role can assign people to the project. The project owner defaulted to having the project owner role.

    Add a member to the project

    1 On the project list page or overview tab, click the Manage Member button, then Add Member.

    image

    image

    2 Type the username you want to add to the project, then click a name in the list of matches.

    image

    3 Select a project role for the member from the drop-down menu.For more information, see Roles and permissions

    image

    4 To finish, click Submit.

    image

    Remove a member

    1 On the project list page or project overview tab, click the Manage Member button.

    image

    2 Find the username you want to remove in the search box, click Remove, then Yes.

    image

    - + \ No newline at end of file diff --git a/0.5.10/swcli/config/index.html b/0.5.10/swcli/config/index.html index 130c11b8b..35bd2e8d0 100644 --- a/0.5.10/swcli/config/index.html +++ b/0.5.10/swcli/config/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.10

    Configuration

    Standalone Instance is installed on the user's laptop or development server, providing isolation at the level of Linux/macOX users. Users can install the Starwhale Python package using the pip command and execute any swcli command. After that, they can view their Starwhale configuration in ~/.config/starwhale/config.yaml. In the vast majority of cases, users do not need to manually modify the config.yaml file.

    The ~/.config/starwhale/config.yaml file has permissions set to 0o600 to ensure security, as it contains sensitive information such as encryption keys. Users are advised not to change the file permissions.You could customize your swcli by swci config edit:

    swcli config edit

    config.yaml example

    The typical config.yaml file is as follows:

    • The default instance is local.
    • cloud-cn/cloud-k8s/pre-k8s are the server/cloud instances, local is the standalone instance.
    • The local storage root directory for the Standalone Instance is set to /home/liutianwei/.starwhale.
    current_instance: local
    instances:
    cloud-cn:
    sw_token: ${TOKEN}
    type: cloud
    updated_at: 2022-09-28 18:41:05 CST
    uri: https://cloud.starwhale.cn
    user_name: starwhale
    user_role: normal
    cloud-k8s:
    sw_token: ${TOKEN}
    type: cloud
    updated_at: 2022-09-19 16:10:01 CST
    uri: http://cloud.pre.intra.starwhale.ai
    user_name: starwhale
    user_role: normal
    local:
    current_project: self
    type: standalone
    updated_at: 2022-06-09 16:14:02 CST
    uri: local
    user_name: liutianwei
    pre-k8s:
    sw_token: ${TOKEN}
    type: cloud
    updated_at: 2022-09-19 18:06:50 CST
    uri: http://console.pre.intra.starwhale.ai
    user_name: starwhale
    user_role: normal
    link_auths:
    - ak: starwhale
    bucket: users
    connect_timeout: 10.0
    endpoint: http://10.131.0.1:9000
    read_timeout: 100.0
    sk: starwhale
    type: s3
    storage:
    root: /home/liutianwei/.starwhale
    version: '2.0'

    config.yaml definition

    ParameterDescriptionTypeDefault ValueRequired
    current_instanceThe name of the default instance to use. It is usually set using the swcli instance select command.StringselfYes
    instancesManaged instances, including Standalone, Server and Cloud Instances. There must be at least one Standalone Instance named "local" and one or more Server/Cloud Instances. You can log in to a new instance with swcli instance login and log out from an instance with swcli instance logout.DictStandalone Instance named "local"Yes
    instances.{instance-alias-name}.sw_tokenLogin token for Server/Cloud Instances. It is only effective for Server/Cloud Instances. Subsequent swcli operations on Server/Cloud Instances will use this token. Note that tokens have an expiration time, typically set to one month, which can be configured within the Server/Cloud Instance.StringCloud - Yes, Standalone - No
    instances.{instance-alias-name}.typeType of the instance, currently can only be "cloud" or "standalone".Choice[string]Yes
    instances.{instance-alias-name}.uriFor Server/Cloud Instances, the URI is an http/https address. For Standalone Instances, the URI is set to "local".StringYes
    instances.{instance-alias-name}.user_nameUser's nameStringYes
    instances.{instance-alias-name}.current_projectDefault Project under the current instance. It will be used to fill the "project" field in the URI representation by default. You can set it using the swcli project select command.StringYes
    instances.{instance-alias-name}.user_roleUser's role.StringnormalYes
    instances.{instance-alias-name}.updated_atThe last updated time for this instance configuration.Time format stringYes
    storageSettings related to local storage.DictYes
    storage.rootThe root directory for Standalone Instance's local storage. Typically, if there is insufficient space in the home directory and you manually move data files to another location, you can modify this field.String~/.starwhaleYes
    versionThe version of config.yaml, currently only supports 2.0.String2.0Yes

    You could put starwhale.Link to your assets while the URI in the Link could be whatever(only s3 like or http is implemented) you need, such as s3://10.131.0.1:9000/users/path. However, Links may need to be authed, you could config the auth info in link_auths.

    link_auths:
    - type: s3
    ak: starwhale
    bucket: users
    region: local
    connect_timeout: 10.0
    endpoint: http://10.131.0.1:9000
    read_timeout: 100.0
    sk: starwhale

    Items in link_auths will match the uri in Links automatically. s3 typed link_auth matching Links by looking up bucket and endpoint.

    - + \ No newline at end of file diff --git a/0.5.10/swcli/index.html b/0.5.10/swcli/index.html index 84b84b30f..0f844694e 100644 --- a/0.5.10/swcli/index.html +++ b/0.5.10/swcli/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.10

    Starwhale Client (swcli) User Guide

    The Starwhale Client (swcli) is a command-line tool that enables you to interact with Starwhale instances. You can use swcli to complete almost all tasks in Starwhale. swcli is written in pure python3 (require Python 3.7 | 3.11) so that it can be easily installed by the pip command. Currently, swcli only supports Linux and macOS, Windows is coming soon.

    - + \ No newline at end of file diff --git a/0.5.10/swcli/installation/index.html b/0.5.10/swcli/installation/index.html index a2c3d2ea7..420bf1b62 100644 --- a/0.5.10/swcli/installation/index.html +++ b/0.5.10/swcli/installation/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.10

    Installation Guide

    We can use swcli to complete all tasks for Starwhale Instances. swcli is written by pure python3, which can be installed easily by the pip command.Here are some installation tips that can help you get a cleaner, unambiguous, no dependency conflicts swcli python environment.

    Installing Advice

    DO NOT install Starwhale in your system's global Python environment. It will cause a python dependency conflict problem.

    Prerequisites

    • Python 3.7 ~ 3.11
    • Linux or macOS
    • Conda (optional)

    In the Ubuntu system, you can run the following commands:

    sudo apt-get install python3 python3-venv python3-pip

    #If you want to install multi python versions
    sudo add-apt-repository -y ppa:deadsnakes/ppa
    sudo apt-get update
    sudo apt-get install -y python3.7 python3.8 python3.9 python3-pip python3-venv python3.8-venv python3.7-venv python3.9-venv

    swcli works on macOS. If you run into issues with the default system Python3 on macOS, try installing Python3 through the homebrew:

    brew install python3

    Install swcli

    Install with venv

    python3 -m venv ~/.cache/venv/starwhale
    source ~/.cache/venv/starwhale/bin/activate
    python3 -m pip install starwhale

    swcli --version

    sudo rm -rf /usr/local/bin/swcli
    sudo ln -s `which swcli` /usr/local/bin/

    Install with conda

    conda create --name starwhale --yes  python=3.9
    conda activate starwhale
    python3 -m pip install starwhale

    swcli --version

    sudo rm -rf /usr/local/bin/swcli
    sudo ln -s `which swcli` /usr/local/bin/

    👏 Now, you can use swcli in the global environment.

    Install for the special scenarios

    # for Audio processing
    python -m pip install starwhale[audio]

    # for Image processing
    python -m pip install starwhale[pillow]

    # for swcli model server command
    python -m pip install starwhale[server]

    # for built-in online serving
    python -m pip install starwhale[online-serve]

    # install all dependencies
    python -m pip install starwhale[all]

    Update swcli

    #for venv
    python3 -m pip install --upgrade starwhale

    #for conda
    conda run -n starwhale python3 -m pip install --upgrade starwhale

    Uninstall swcli

    python3 -m pip remove starwhale

    rm -rf ~/.config/starwhale
    rm -rf ~/.starwhale
    - + \ No newline at end of file diff --git a/0.5.10/swcli/swignore/index.html b/0.5.10/swcli/swignore/index.html index 885d67e33..731391cb1 100644 --- a/0.5.10/swcli/swignore/index.html +++ b/0.5.10/swcli/swignore/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.10

    About the .swignore file

    The .swignore file is similar to .gitignore, .dockerignore, and other files used to define ignored files or dirs. The .swignore files mainly used in the Starwhale Model building process. By default, the swcli model build command or starwhale.model.build() Python SDK will traverse all files in the specified directory and automatically exclude certain known files or directories that are not suitable for inclusion in the model package.

    PATTERN FORMAT

    • Each line in a swignore file specifies a pattern, which matches files and directories.
    • A blank line matches no files, so it can serve as a separator for readability.
    • An asterisk * matches anything except a slash.
    • A line starting with # serves as a comment.
    • Support wildcard expression, for example: *.jpg, .png.

    Auto Ingored files or dirs

    If you want to include the auto ingored files or dirs, you can add --add-all for swcli model build command.

    • __pycache__/
    • *.py[cod]
    • *$py.class
    • venv installation dir
    • conda installation dir

    Example

    Here is the .swignore file used in the MNIST example:

    venv/*
    .git/*
    .history*
    .vscode/*
    .venv/*
    data/*
    .idea/*
    *.py[cod]
    - + \ No newline at end of file diff --git a/0.5.10/swcli/uri/index.html b/0.5.10/swcli/uri/index.html index 1752a92e7..b385fe253 100644 --- a/0.5.10/swcli/uri/index.html +++ b/0.5.10/swcli/uri/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.10

    Starwhale Resources URI

    tip

    Resource URI is widely used in Starwhale client commands. The URI can refer to a resource in the local instance or any other resource in a remote instance. In this way, the Starwhale client can easily manipulate any resource.

    concepts-org.jpg

    Instance URI

    Instance URI can be either:

    • local: standalone instance.
    • [http(s)://]<hostname or ip>[:<port>]: cloud instance with HTTP address.
    • [cloud://]<cloud alias>: cloud or server instance with an alias name, which can be configured in the instance login phase.
    caution

    "local" is different from "localhost". The former means the local standalone instance without a controller, while the latter implies a controller listening at the default port 8082 on the localhost.

    Example:

    # log in Starwhale Cloud; the alias is swcloud
    swcli instance login --username <your account name> --password <your password> https://cloud.starwhale.ai --alias swcloud

    # copy a model from the local instance to the cloud instance
    swcli model copy mnist/version/latest swcloud/project/<your account name>:demo

    # copy a runtime to a Starwhale Server instance: http://localhost:8081
    swcli runtime copy pytorch/version/v1 http://localhost:8081/project/<your account name>:demo

    Project URI

    Project URI is in the format [<Instance URI>/project/]<project name>. If the instance URI is not specified, use the current instance instead.

    Example:

    swcli project select self   # select the self project in the current instance
    swcli project info local/project/self # inspect self project info in the local instance

    Model/Dataset/Runtime URI

    • Model URI: [<Project URI>/model/]<model name>[/version/<version id|tag>].
    • Dataset URI: [<Project URI>/dataset/]<dataset name>[/version/<version id|tag>].
    • Runtime URI: [<Project URI>/runtime/]<runtime name>[/version/<version id|tag>].
    tip
    • swcli supports human-friendly short version id. You can type the first few characters of the version id, provided it is at least four characters long and unambiguous. However, the recover command must use the complete version id.
    • If the project URI is not specified, the default project will be used.
    • You can always use the version tag instead of the version id.

    Example:

    swcli model info mnist/version/hbtdenjxgm4ggnrtmftdgyjzm43tioi  # inspect model info, model name: mnist, version:hbtdenjxgm4ggnrtmftdgyjzm43tioi
    swcli model remove mnist/version/hbtdenj # short version
    swcli model info mnist # inspect mnist model info
    swcli model run mnist --runtime pytorch-mnist --dataset mnist # use the default latest tag

    Job URI

    • format: [<Project URI>/job/]<job id>.
    • If the project URI is not specified, the default project will be used.

    Example:

    swcli job info mezdayjzge3w   # Inspect mezdayjzge3w version in default instance and default project
    swcli job info local/project/self/job/mezday # Inspect the local instance, self project, with short job id:mezday

    The default instance

    When the instance part of a project URI is omitted, the default instance is used instead. The default instance is the one selected by the swcli instance login or swcli instance use command.

    The default project

    When the project parts of Model/Dataset/Runtime/Evaluation URIs are omitted, the default project is used instead. The default project is the one selected by the swcli project use command.

    - + \ No newline at end of file diff --git a/0.5.12/cloud/billing/bills/index.html b/0.5.12/cloud/billing/bills/index.html index fd2f71b80..6248fcd6d 100644 --- a/0.5.12/cloud/billing/bills/index.html +++ b/0.5.12/cloud/billing/bills/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    - + \ No newline at end of file diff --git a/0.5.12/cloud/billing/index.html b/0.5.12/cloud/billing/index.html index 4f0305215..aa362a017 100644 --- a/0.5.12/cloud/billing/index.html +++ b/0.5.12/cloud/billing/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    - + \ No newline at end of file diff --git a/0.5.12/cloud/billing/recharge/index.html b/0.5.12/cloud/billing/recharge/index.html index 01e178c6c..c72f38d1c 100644 --- a/0.5.12/cloud/billing/recharge/index.html +++ b/0.5.12/cloud/billing/recharge/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    - + \ No newline at end of file diff --git a/0.5.12/cloud/billing/refund/index.html b/0.5.12/cloud/billing/refund/index.html index 91f5aa51e..35fa0c242 100644 --- a/0.5.12/cloud/billing/refund/index.html +++ b/0.5.12/cloud/billing/refund/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    - + \ No newline at end of file diff --git a/0.5.12/cloud/billing/voucher/index.html b/0.5.12/cloud/billing/voucher/index.html index cbf367558..760afc071 100644 --- a/0.5.12/cloud/billing/voucher/index.html +++ b/0.5.12/cloud/billing/voucher/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    - + \ No newline at end of file diff --git a/0.5.12/cloud/index.html b/0.5.12/cloud/index.html index 056416566..68a7417bb 100644 --- a/0.5.12/cloud/index.html +++ b/0.5.12/cloud/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    Starwhale Cloud User Guide

    Starwhale Cloud is a service hosted on public cloud and operated by the Starwhale team. The access url is https://cloud.starwhale.cn.

    - + \ No newline at end of file diff --git a/0.5.12/community/contribute/index.html b/0.5.12/community/contribute/index.html index 4feda6850..1396a1f2f 100644 --- a/0.5.12/community/contribute/index.html +++ b/0.5.12/community/contribute/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    Contribute to Starwhale

    Getting Involved/Contributing

    We welcome and encourage all contributions to Starwhale, including and not limited to:

    • Describe the problems encountered during use.
    • Submit feature request.
    • Discuss in Slack and Github Issues.
    • Code Review.
    • Improve docs, tutorials and examples.
    • Fix Bug.
    • Add Test Case.
    • Code readability and code comments to import readability.
    • Develop new features.
    • Write enhancement proposal.

    You can get involved, get updates and contact Starwhale developers in the following ways:

    Starwhale Resources

    Code Structure

    • client: swcli and Python SDK with Pure Python3, which includes all Standalone Instance features.
      • api: Python SDK.
      • cli: Command Line Interface entrypoint.
      • base: Python base abstract.
      • core: Starwhale core concepts which includes Dataset,Model,Runtime,Project, job and Evaluation, etc.
      • utils: Python utilities lib.
    • console: frontend with React + TypeScript.
    • server:Starwhale Controller with java, which includes all Starwhale Cloud Instance backend apis.
    • docker:Helm Charts, dockerfile.
    • docs:Starwhale官方文档。
    • example:Example code.
    • scripts:Bash and Python scripts for E2E testing and software releases, etc.

    Fork and clone the repository

    You will need to fork the code of Starwhale repository and clone it to your local machine.

    • Fork Starwhale repository: Fork Starwhale Github Repo,For more usage details, please refer to: Fork a repo

    • Install Git-LFS:Git-LFS

       git lfs install
    • Clone code to local machine

      git clone https://github.com/${your username}/starwhale.git

    Development environment for Standalone Instance

    Standalone Instance is written in Python3. When you want to modify swcli and sdk, you need to build the development environment.

    Standalone development environment prerequisites

    • OS: Linux or macOS
    • Python: 3.7~3.11
    • Docker: >=19.03(optional)
    • Python isolated env tools:Python venv, virtualenv or conda, etc

    Building from source code

    Based on the previous step, clone to the local directory: starwhale, and enter the client subdirectory:

    cd starwhale/client

    Create an isolated python environment with conda:

    conda create -n starwhale-dev python=3.8 -y
    conda activate starwhale-dev

    Install client package and python dependencies into the starwhale-dev environment:

    make install-sw
    make install-dev-req

    Validate with the swcli --version command. In the development environment, the version is 0.0.0.dev0:

    ❯ swcli --version
    swcli, version 0.0.0.dev0

    ❯ swcli --version
    /home/username/anaconda3/envs/starwhale-dev/bin/swcli

    Modifying the code

    When you modify the code, you need not to install python package(run make install-sw command) again. .editorconfig will be imported into the most IDE and code editors which helps maintain consistent coding styles for multiple developers.

    Lint and Test

    Run unit test, E2E test, mypy lint, flake lint and isort check in the starwhale directory.

    make client-all-check

    Development environment for Cloud Instance

    Cloud Instance is written in Java(backend) and React+TypeScript(frontend).

    Development environment for Console

    Development environment for Server

    • Language: Java
    • Build tool: Maven
    • Development framework: Spring Boot+Mybatis
    • Unit test framework:Junit5
      • Mockito used for mocking
      • Hamcrest used for assertion
      • Testcontainers used for providing lightweight, throwaway instances of common databases, Selenium web browsers that can run in a Docker container.
    • Check style tool:use maven-checkstyle-plugin

    Server development environment prerequisites

    • OS: Linux, macOS or Windows
    • Docker: >=19.03
    • JDK: >=11
    • Maven: >=3.8.1
    • Mysql: >=8.0.29
    • Minio
    • Kubernetes cluster/Minikube(If you don't have a k8s cluster, you can use Minikube as an alternative for development and debugging)

    Modify the code and add unit tests

    Now you can enter the corresponding module to modify and adjust the code on the server side. The main business code directory is src/main/java, and the unit test directory is src/test/java.

    Execute code check and run unit tests

    cd starwhale/server
    mvn clean test

    Deploy the server at local machine

    • Dependent services that need to be deployed

      • Minikube(Optional. Minikube can be used when there is no k8s cluster, there is the installation doc: Minikube

        minikube start
        minikube addons enable ingress
        minikube addons enable ingress-dns
      • Mysql

        docker run --name sw-mysql -d \
        -p 3306:3306 \
        -e MYSQL_ROOT_PASSWORD=starwhale \
        -e MYSQL_USER=starwhale \
        -e MYSQL_PASSWORD=starwhale \
        -e MYSQL_DATABASE=starwhale \
        mysql:latest
      • Minio

        docker run --name minio -d \
        -p 9000:9000 --publish 9001:9001 \
        -e MINIO_DEFAULT_BUCKETS='starwhale' \
        -e MINIO_ROOT_USER="minioadmin" \
        -e MINIO_ROOT_PASSWORD="minioadmin" \
        bitnami/minio:latest
    • Package server program

      If you need to deploy the front-end at the same time when deploying the server, you can execute the build command of the front-end part first, and then execute 'mvn clean package', and the compiled front-end files will be automatically packaged.

      Use the following command to package the program

        cd starwhale/server
      mvn clean package
    • Specify the environment required for server startup

      # Minio env
      export SW_STORAGE_ENDPOINT=http://${Minio IP,default is:27.0.0.1}:9000
      export SW_STORAGE_BUCKET=${Minio bucket,default is:starwhale}
      export SW_STORAGE_ACCESSKEY=${Minio accessKey,default is:starwhale}
      export SW_STORAGE_SECRETKEY=${Minio secretKey,default is:starwhale}
      export SW_STORAGE_REGION=${Minio region,default is:local}
      # kubernetes env
      export KUBECONFIG=${the '.kube' file path}\.kube\config

      export SW_INSTANCE_URI=http://${Server IP}:8082
      export SW_METADATA_STORAGE_IP=${Mysql IP,default: 127.0.0.1}
      export SW_METADATA_STORAGE_PORT=${Mysql port,default: 3306}
      export SW_METADATA_STORAGE_DB=${Mysql dbname,default: starwhale}
      export SW_METADATA_STORAGE_USER=${Mysql user,default: starwhale}
      export SW_METADATA_STORAGE_PASSWORD=${user password,default: starwhale}
    • Deploy server service

      You can use the IDE or the command to deploy.

      java -jar controller/target/starwhale-controller-0.1.0-SNAPSHOT.jar
    • Debug

      there are two ways to debug the modified function:

      • Use swagger-ui for interface debugging, visit /swagger-ui/index.html to find the corresponding api
      • Debug the corresponding function directly in the ui (provided that the front-end code has been built in advance according to the instructions when packaging)
    - + \ No newline at end of file diff --git a/0.5.12/concepts/index.html b/0.5.12/concepts/index.html index 63be2f88c..9de2ebe72 100644 --- a/0.5.12/concepts/index.html +++ b/0.5.12/concepts/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    - + \ No newline at end of file diff --git a/0.5.12/concepts/names/index.html b/0.5.12/concepts/names/index.html index 9539e5c01..313c39e0f 100644 --- a/0.5.12/concepts/names/index.html +++ b/0.5.12/concepts/names/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    Names in Starwhale

    Names mean project names, model names, dataset names, runtime names, and tag names.

    Names Limitation

    • Names are case-insensitive.
    • A name MUST only consist of letters A-Z a-z, digits 0-9, the hyphen character -, the dot character ., and the underscore character _.
    • A name should always start with a letter or the _ character.
    • The maximum length of a name is 80.

    Names uniqueness requirement

    • The resource name should be a unique string within its owner. For example, the project name should be unique in the owner instance, and the model name should be unique in the owner project.
    • The resource name can not be used by any other resource of the same kind in their owner, including those removed ones. For example, Project "apple" can not have two models named "Alice", even if one of them is already removed.
    • Different kinds of resources can have the same name. For example, a project and a model can be called "Alice" simultaneously.
    • Resources with different owners can have the same name. For example, a model in project "Apple" and a model in project "Banana" can have the same name "Alice".
    • Garbage-collected resources' names can be reused. For example, after the model with the name "Alice" in project "Apple" is removed and garbage collected, the project can have a new model with the same name "Alice".
    - + \ No newline at end of file diff --git a/0.5.12/concepts/project/index.html b/0.5.12/concepts/project/index.html index 092809292..9813f960b 100644 --- a/0.5.12/concepts/project/index.html +++ b/0.5.12/concepts/project/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    Project in Starwhale

    "Project" is the basic unit for organizing different resources like models, datasets, etc. You may use projects for different purposes. For example, you can create a project for a data scientist team, a product line, or a specific model. Users usually work on one or more projects in their daily lives.

    Starwhale Server/Cloud projects are grouped by accounts. Starwhale Standalone does not have accounts. So you will not see any account name prefix in Starwhale Standalone projects. Starwhale Server/Cloud projects can be either "public" or "private". Public projects means all users on the same instance are assigned a "guest" role to the project by default. For more information about roles, see Roles and permissions in Starwhale.

    A self project is created automatically and configured as the default project in Starwhale Standalone.

    - + \ No newline at end of file diff --git a/0.5.12/concepts/roles-permissions/index.html b/0.5.12/concepts/roles-permissions/index.html index 83d5a5b1c..10a93c917 100644 --- a/0.5.12/concepts/roles-permissions/index.html +++ b/0.5.12/concepts/roles-permissions/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    Roles and permissions in Starwhale

    Roles are used to assign permissions to users. Only Starwhale Server/Cloud has roles and permissions, and Starwhale Standalone does not.The Administrator role is automatically created and assigned to the user "admin". Some sensitive operations can only be performed by users with the Administrator role, for example, creating accounts in Starwhale Server.

    Projects have three roles:

    • Admin - Project administrators can read and write project data and assign project roles to users.
    • Maintainer - Project maintainers can read and write project data.
    • Guest - Project guests can only read project data.
    ActionAdminMaintainerGuest
    Manage project membersYes
    Edit projectYesYes
    View projectYesYesYes
    Create evaluationsYesYes
    Remove evaluationsYesYes
    View evaluationsYesYesYes
    Create datasetsYesYes
    Update datasetsYesYes
    Remove datasetsYesYes
    View datasetsYesYesYes
    Create modelsYesYes
    Update modelsYesYes
    Remove modelsYesYes
    View modelsYesYesYes
    Create runtimesYesYes
    Update runtimesYesYes
    Remove runtimesYesYes
    View runtimesYesYesYes

    The user who creates a project becomes the first project administrator. They can assign roles to other users later.

    - + \ No newline at end of file diff --git a/0.5.12/concepts/versioning/index.html b/0.5.12/concepts/versioning/index.html index 3d5ee526d..68e5ca0d4 100644 --- a/0.5.12/concepts/versioning/index.html +++ b/0.5.12/concepts/versioning/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    Resource versioning in Starwhale

    • Starwhale manages the history of all models, datasets, and runtimes. Every update to a specific resource appends a new version of the history.
    • Versions are identified by a version id which is a random string generated automatically by Starwhale and are ordered by their creation time.
    • Versions can have tags. Starwhale uses version tags to provide a human-friendly representation of versions. By default, Starwhale attaches a default tag to each version. The default tag is the letter "v", followed by a number. For each versioned resource, the first version tag is always tagged with "v0", the second version is tagged with "v1", and so on. And there is a special tag "latest" that always points to the last version. When a version is removed, its default tag will not be reused. For example, there is a model with tags "v0, v1, v2". When "v2" is removed, tags will be "v0, v1". And the following tag will be "v3" instead of "v2" again. You can attach your own tags to any version and remove them at any time.
    • Starwhale uses a linear history model. There is neither branch nor cycle in history.
    • History can not be rollback. When a version is to be reverted, Starwhale clones the version and appends it as a new version to the end of the history. Versions in history can be manually removed and recovered.
    - + \ No newline at end of file diff --git a/0.5.12/dataset/index.html b/0.5.12/dataset/index.html index c17b8d4bf..dee167535 100644 --- a/0.5.12/dataset/index.html +++ b/0.5.12/dataset/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    Starwhale Dataset User Guide

    Design Overview

    Starwhale Dataset Positioning

    The Starwhale Dataset contains three core stages: data construction, data loading, and data visualization. It is a data management tool for the ML/DL field. Starwhale Dataset can directly use the environment built by Starwhale Runtime, and can be seamlessly integrated with Starwhale Model and Starwhale Evaluation. It is an important part of the Starwhale MLOps toolchain.

    According to the classification of MLOps Roles in Machine Learning Operations (MLOps): Overview, Definition, and Architecture, the three stages of Starwhale Dataset target the following user groups:

    • Data construction: Data Engineer, Data Scientist
    • Data loading: Data Scientist, ML Developer
    • Data visualization: Data Engineer, Data Scientist, ML Developer

    mlops-users

    Core Functions

    • Efficient loading: The original dataset files are stored in external storage such as OSS or NAS, and are loaded on demand without having to save to disk.
    • Simple construction: Supports one-click dataset construction from Image/Video/Audio directories, json files and Huggingface datasets, and also supports writing Python code to build completely custom datasets.
    • Versioning: Can perform version tracking, data append and other operations, and avoid duplicate data storage through the internally abstracted ObjectStore.
    • Sharing: Implement bidirectional dataset sharing between Standalone instances and Cloud/Server instances through the swcli dataset copy command.
    • Visualization: The web interface of Cloud/Server instances can present multi-dimensional, multi-type data visualization of datasets.
    • Artifact storage: The Standalone instance can store locally built or distributed swds series files, while the Cloud/Server instance uses object storage to provide centralized swds artifact storage.
    • Seamless Starwhale integration: Starwhale Dataset can use the runtime environment built by Starwhale Runtime to build datasets. Starwhale Evaluation and Starwhale Model can directly specify the dataset through the --dataset parameter to complete automatic data loading, which facilitates inference, model evaluation and other environments.

    Key Elements

    • swds virtual package file: swds is different from swmp and swrt. It is not a single packaged file, but a virtual concept that specifically refers to a directory that contains dataset-related files for a version of the Starwhale dataset, including _manifest.yaml, dataset.yaml, dataset build Python scripts, and data file links, etc. You can use the swcli dataset info command to view where the swds is located. swds is the abbreviation of Starwhale Dataset.

    swds-tree.png

    • swcli dataset command line: A set of dataset-related commands, including construction, distribution and management functions. See CLI Reference for details.
    • dataset.yaml configuration file: Describes the dataset construction process. It can be completely omitted and specified through swcli dataset build parameters. dataset.yaml can be considered as a configuration file representation of the swcli dataset build command line parameters. swcli dataset build parameters take precedence over dataset.yaml.
    • Dataset Python SDK: Includes data construction, data loading, and several predefined data types. See Python SDK for details.
    • Python scripts for dataset construction: A series of scripts written using the Starwhale Python SDK to build datasets.

    Best Practices

    The construction of Starwhale Dataset is performed independently. If third-party libraries need to be introduced when writing construction scripts, using Starwhale Runtime can simplify Python dependency management and ensure reproducible dataset construction. The Starwhale platform will build in as many open source datasets as possible for users to copy datasets for immediate use.

    Command Line Grouping

    The Starwhale Dataset command line can be divided into the following stages from the perspective of usage phases:

    • Construction phase
      • swcli dataset build
    • Visualization phase
      • swcli dataset diff
      • swcli dataset head
    • Distribution phase
      • swcli dataset copy
    • Basic management
      • swcli dataset tag
      • swcli dataset info
      • swcli dataset history
      • swcli dataset list
      • swcli dataset summary
      • swcli dataset remove
      • swcli dataset recover

    Starwhale Dataset Viewer

    Currently, the Web UI in the Cloud/Server instance can visually display the dataset. Currently, only DataTypes using the Python SDK can be correctly interpreted by the frontend, with mappings as follows:

    • Image: Display thumbnails, enlarged images, MASK type images, support image/png, image/jpeg, image/webp, image/svg+xml, image/gif, image/apng, image/avif formats.
    • Audio: Displayed as an audio wave graph, playable, supports audio/mp3 and audio/wav formats.
    • Video: Displayed as a video, playable, supports video/mp4, video/avi and video/webm formats.
    • GrayscaleImage: Display grayscale images, support x/grayscale format.
    • Text: Display text, support text/plain format, set encoding format, default is utf-8.
    • Binary and Bytes: Not supported for display currently.
    • Link: The above multimedia types all support specifying links as storage paths.

    Starwhale Dataset Data Format

    The dataset consists of multiple rows, each row being a sample, each sample containing several features. The features have a dict-like structure with some simple restrictions [L]:

    • The dict keys must be str type.
    • The dict values must be Python basic types like int/float/bool/str/bytes/dict/list/tuple, or Starwhale built-in data types.
    • For the same key across different samples, the value types do not need to stay the same.
    • If the value is a list or tuple, the element data types must be consistent.
    • For dict values, the restrictions are the same as [L].

    Example:

    {
    "img": GrayscaleImage(
    link=Link(
    "123",
    offset=32,
    size=784,
    _swds_bin_offset=0,
    _swds_bin_size=8160,
    )
    ),
    "label": 0,
    }

    File Data Handling

    Starwhale Dataset handles file type data in a special way. You can ignore this section if you don't care about Starwhale's implementation.

    According to actual usage scenarios, Starwhale Dataset has two ways of handling file class data that is based on the base class starwhale.BaseArtifact:

    • swds-bin: Starwhale merges the data into several large files in its own binary format (swds-bin), which can efficiently perform indexing, slicing and loading.
    • remote-link: If the user's original data is stored in some external storage such as OSS or NAS, with a lot of original data that is inconvenient to move or has already been encapsulated by some internal dataset implementation, then you only need to use links in the data to establish indexes.

    In the same Starwhale dataset, two types of data can be included simultaneously.

    - + \ No newline at end of file diff --git a/0.5.12/dataset/yaml/index.html b/0.5.12/dataset/yaml/index.html index 7d433b81b..203fbc4f6 100644 --- a/0.5.12/dataset/yaml/index.html +++ b/0.5.12/dataset/yaml/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    The dataset.yaml Specification

    tip

    dataset.yaml is optional for the swcli dataset build command.

    Building Starwhale Dataset uses dataset.yaml. Omitting dataset.yaml allows describing related configurations in swcli dataset build command line parameters. dataset.yaml can be considered as a file-based representation of the build command line configuration.

    YAML Field Descriptions

    FieldDescriptionRequiredTypeDefault
    nameName of the Starwhale DatasetYesString
    handlerImportable address of a class that inherits starwhale.SWDSBinBuildExecutor, starwhale.UserRawBuildExecutor or starwhale.BuildExecutor, or a function that returns a Generator or iterable object. Format is {module path}:{class name\|function name}YesString
    descDataset descriptionNoString""
    versiondataset.yaml format version, currently only "1.0" is supportedNoString1.0
    attrDataset build parametersNoDict
    attr.volume_sizeSize of each data file in the swds-bin dataset. Can be a number in bytes, or a number plus unit like 64M, 1GB etc.NoInt or Str64MB
    attr.alignment_sizeData alignment size of each data block in the swds-bin dataset. If set to 4k, and a data block is 7.9K, 0.1K padding will be added to make the block size a multiple of alignment_size, improving page size and read efficiency.NoInteger or String128

    Examples

    Simplest Example

    name: helloworld
    handler: dataset:ExampleProcessExecutor

    The helloworld dataset uses the ExampleProcessExecutor class in dataset.py of the dataset.yaml directory to build data.

    MNIST Dataset Build Example

    name: mnist
    handler: mnist.dataset:DatasetProcessExecutor
    desc: MNIST data and label test dataset
    attr:
    alignment_size: 128
    volume_size: 4M

    Example with handler as a generator function

    dataset.yaml contents:

    name: helloworld
    handler: dataset:iter_item

    dataset.py contents:

    def iter_item():
    for i in range(10):
    yield {"img": f"image-{i}".encode(), "label": i}
    - + \ No newline at end of file diff --git a/0.5.12/evaluation/heterogeneous/node-able/index.html b/0.5.12/evaluation/heterogeneous/node-able/index.html index 9fd602171..a733c7336 100644 --- a/0.5.12/evaluation/heterogeneous/node-able/index.html +++ b/0.5.12/evaluation/heterogeneous/node-able/index.html @@ -10,7 +10,7 @@ - + @@ -23,7 +23,7 @@ Refer to the link.

    Take v0.13.0-rc.1 as an example:

    kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.13.0-rc.1/nvidia-device-plugin.yml

    Note: This operation will run the NVIDIA device plugin plugin on all Kubernetes nodes. If configured before, it will be updated. Please evaluate the image version used carefully.

  • Confirm GPU can be discovered and used in the cluster. Refer to the command below. Check that nvidia.com/gpu is in the Capacity of the Jetson node. The GPU is then recognized normally by the Kubernetes cluster.

    # kubectl describe node orin | grep -A15 Capacity
    Capacity:
    cpu: 12
    ephemeral-storage: 59549612Ki
    hugepages-1Gi: 0
    hugepages-2Mi: 0
    hugepages-32Mi: 0
    hugepages-64Ki: 0
    memory: 31357608Ki
    nvidia.com/gpu: 1
    pods: 110
  • Build and Use Custom Images

    The l4t-jetpack image mentioned earlier can meet our general use. If we need to customize a more streamlined image or one with more features, we can make it based on l4t-base. Relevant Dockerfiles can refer to the image Starwhale made for mnist.

    - + \ No newline at end of file diff --git a/0.5.12/evaluation/heterogeneous/virtual-node/index.html b/0.5.12/evaluation/heterogeneous/virtual-node/index.html index 70bed8ff0..f2b28725c 100644 --- a/0.5.12/evaluation/heterogeneous/virtual-node/index.html +++ b/0.5.12/evaluation/heterogeneous/virtual-node/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    Virtual Kubelet as Kubernetes nodes

    Introduction

    Virtual Kubelet is an open source framework that can simulate a K8s node by mimicking the communication between kubelet and the K8s cluster.

    This solution is widely used by major cloud vendors for serverless container cluster solutions, such as Alibaba Cloud's ASK, Amazon's AWS Fargate, etc.

    Principles

    The virtual kubelet framework implements the related interfaces of kubelet for Node. With simple configuration, it can simulate a node.

    We only need to implement the PodLifecycleHandler interface to support:

    • Create, update, delete Pod
    • Get Pod status
    • Get Container logs

    Adding Devices to the Cluster

    If our device cannot serve as a K8s node due to resource constraints or other situations, we can manage these devices by using virtual kubelet to simulate a proxy node.

    The control flow between Starwhale Controller and the device is as follows:


    ┌──────────────────────┐ ┌────────────────┐ ┌─────────────────┐ ┌────────────┐
    │ Starwhale Controller ├─────►│ K8s API Server ├────►│ virtual kubelet ├────►│ Our device │
    └──────────────────────┘ └────────────────┘ └─────────────────┘ └────────────┘

    Virtual kubelet converts the Pod orchestration information sent by Starwhale Controller into control behaviors for the device, such as executing a command via ssh on the device, or sending a message via USB or serial port.

    Below is an example of using virtual kubelet to control a device not joined to the cluster that is SSH-enabled:

    1. Prepare certificates
    • Create file vklet.csr with the following content:
    [req]
    req_extensions = v3_req
    distinguished_name = req_distinguished_name

    [req_distinguished_name]

    [v3_req]
    basicConstraints = CA:FALSE
    keyUsage = digitalSignature, keyEncipherment
    extendedKeyUsage = serverAuth
    subjectAltName = @alt_names

    [alt_names]
    IP = 1.2.3.4
    • Generate the certificate:
    openssl genrsa -out vklet-key.pem 2048
    openssl req -new -key vklet-key.pem -out vklet.csr -subj '/CN=system:node:1.2.3.4;/C=US/O=system:nodes' -config ./csr.conf
    • Submit the certificate:
    cat vklet.csr| base64 | tr -d "\n" # output as content of spec.request in csr.yaml

    csr.yaml:

    apiVersion: certificates.k8s.io/v1
    kind: CertificateSigningRequest
    metadata:
    name: vklet
    spec:
    request: ******************
    signerName: kubernetes.io/kube-apiserver-client
    expirationSeconds: 1086400
    usages:
    - client auth
    kubectl apply -f csr.yaml
    kubectl certificate approve vklet
    kubectl get csr vklet -o jsonpath='{.status.certificate}'| base64 -d > vklet-cert.pem

    Now we have vklet-cert.pem.

    • Compile virtual kubelet:
    git clone https://github.com/virtual-kubelet/virtual-kubelet
    cd virtual-kubelet && make build

    Create the node configuration file mock.json:

    {
    "virtual-kubelet":
    {
    "cpu": "100",
    "memory": "100Gi",
    "pods": "100"
    }
    }

    Start virtual kubelet:

    export APISERVER_CERT_LOCATION=/path/to/vklet-cert.pem
    export APISERVER_KEY_LOCATION=/path/to/vklet-key.pem
    export KUBECONFIG=/path/to/kubeconfig
    virtual-kubelet --provider mock --provider-config /path/to/mock.json

    Now we have simulated a node with 100 cores + 100GB memory using virtual kubelet.

    • Add PodLifecycleHandler implementation to convert important information in Pod orchestration into ssh command execution, and collect logs for Starwhale Controller to collect.

    See ssh executor for a concrete implementation.

    - + \ No newline at end of file diff --git a/0.5.12/evaluation/index.html b/0.5.12/evaluation/index.html index 3a1b2ce71..4d2a15753 100644 --- a/0.5.12/evaluation/index.html +++ b/0.5.12/evaluation/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    Starwhale Model Evaluation

    Design Overview

    Starwhale Evaluation Positioning

    The goal of Starwhale Evaluation is to provide end-to-end management for model evaluation, including creating Jobs, distributing Tasks, viewing model evaluation reports and basic management. Starwhale Evaluation is a specific application of Starwhale Model, Starwhale Dataset, and Starwhale Runtime in the model evaluation scenario. Starwhale Evaluation is part of the MLOps toolchain built by Starwhale. More applications like Starwhale Model Serving, Starwhale Training will be included in the future.

    Core Features

    • Visualization: Both swcli and the Web UI provide visualization of model evaluation results, supporting comparison of multiple results. Users can also customize logging of intermediate processes.

    • Multi-scenario Adaptation: Whether it's a notebook, desktop or distributed cluster environment, the same commands, Python scripts, artifacts and operations can be used for model evaluation. This satisfies different computational power and data volume requirements.

    • Seamless Starwhale Integration: Leverage Starwhale Runtime for the runtime environment, Starwhale Dataset as data input, and run models from Starwhale Model. Configuration is simple whether using swcli, Python SDK or Cloud/Server instance Web UI.

    Key Elements

    • swcli model run: Command line for bulk offline model evaluation.
    • swcli model serve: Command line for online model evaluation.

    Best Practices

    Command Line Grouping

    From the perspective of completing an end-to-end Starwhale Evaluation workflow, commands can be grouped as:

    • Preparation Stage
      • swcli dataset build or Starwhale Dataset Python SDK
      • swcli model build or Starwhale Model Python SDK
      • swcli runtime build
    • Evaluation Stage
      • swcli model run
      • swcli model serve
    • Results Stage
      • swcli job info
    • Basic Management
      • swcli job list
      • swcli job remove
      • swcli job recover

    Abstraction job-step-task

    • job: A model evaluation task is a job, which contains one or more steps.

    • step: A step corresponds to a stage in the evaluation process. With the default PipelineHandler, steps are predict and evaluate. For custom evaluation processes using @handler, @evaluation.predict, @evaluation.evaluate decorators, steps are the decorated functions. Steps can have dependencies, forming a DAG. A step contains one or more tasks. Tasks in the same step have the same logic but different inputs. A common approach is to split the dataset into multiple parts, with each part passed to a task. Tasks can run in parallel.

    • task: A task is the final running entity. In Cloud/Server instances, a task is a container in a Pod. In Standalone instances, a task is a Python Thread.

    The job-step-task abstraction is the basis for implementing distributed runs in Starwhale Evaluation.

    - + \ No newline at end of file diff --git a/0.5.12/faq/index.html b/0.5.12/faq/index.html index 7d81c291d..da7a6d5d9 100644 --- a/0.5.12/faq/index.html +++ b/0.5.12/faq/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    - + \ No newline at end of file diff --git a/0.5.12/getting-started/cloud/index.html b/0.5.12/getting-started/cloud/index.html index b8e1d7dc2..e1a315ea2 100644 --- a/0.5.12/getting-started/cloud/index.html +++ b/0.5.12/getting-started/cloud/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    Getting started with Starwhale Cloud

    Starwhale Cloud is hosted on Aliyun with the domain name https://cloud.starwhale.cn. In the futher, we will launch the service on AWS with the domain name https://cloud.starwhale.ai. It's important to note that these are two separate instances that are not interconnected, and accounts and data are not shared. You can choose either one to get started.

    You need to install the Starwhale Client (swcli) at first.

    Sign Up for Starwhale Cloud and create your first project

    You can either directly log in with your GitHub or Weixin account or sign up for an account. You will be asked for an account name if you log in with your GitHub or Weixin account.

    Then you can create a new project. In this tutorial, we will use the name demo for the project name.

    Build the dataset, model, and runtime on your local machine

    Follow step 1 to step 4 in Getting started with Starwhale Standalone to create:

    • a Starwhale model named mnist
    • a Starwhale dataset named mnist
    • a Starwhale runtime named pytorch

    Login to the cloud instance

    swcli instance login --username <your account name> --password <your password> --alias swcloud https://cloud.starwhale.cn

    Copy the dataset, model, and runtime to the cloud instance

    swcli model copy mnist swcloud/project/<your account name>:demo
    swcli dataset copy mnist swcloud/project/<your account name>:demo
    swcli runtime copy pytorch swcloud/project/<your account name>:demo

    Run an evaluation with the web UI

    console-create-job.gif

    Congratulations! You have completed the Starwhale Cloud Getting Started Guide.

    - + \ No newline at end of file diff --git a/0.5.12/getting-started/index.html b/0.5.12/getting-started/index.html index 26da767d3..0ef201d1e 100644 --- a/0.5.12/getting-started/index.html +++ b/0.5.12/getting-started/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    Getting started

    First, you need to install the Starwhale Client (swcli), which can be done by running the following command:

    python3 -m pip install starwhale

    For more information, see the swcli installation guide.

    Depending on your instance type, there are three getting-started guides available for you:

    • Getting started with Starwhale Standalone - This guide helps you run an MNIST evaluation on your desktop PC/laptop. It is the fastest and simplest way to get started with Starwhale.
    • Getting started with Starwhale Server - This guide helps you install Starwhale Server in your private data center and run an MNIST evaluation. At the end of the tutorial, you will have a Starwhale Server instance where you can run model evaluations on and manage your datasets and models.
    • Getting started with Starwhale Cloud - This guide helps you create an account on Starwhale Cloud and run an MNIST evaluation. It is the easiest way to experience all Starwhale features.
    - + \ No newline at end of file diff --git a/0.5.12/getting-started/runtime/index.html b/0.5.12/getting-started/runtime/index.html index d3754644f..e85d50f58 100644 --- a/0.5.12/getting-started/runtime/index.html +++ b/0.5.12/getting-started/runtime/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    Getting Started with Starwhale Runtime

    This article demonstrates how to build a Starwhale Runtime of the Pytorch environment and how to use it. This runtime can meet the dependency requirements of the six examples in Starwhale: mnist, speech commands, nmt, cifar10, ag_news, and PennFudan. Links to relevant code: example/runtime/pytorch.

    You can learn the following things from this tutorial:

    • How to build a Starwhale Runtime.
    • How to use a Starwhale Runtime in different scenarios.
    • How to release a Starwhale Runtime.

    Prerequisites

    Run the following command to clone the example code:

    git clone https://github.com/star-whale/starwhale.git
    cd starwhale/example/runtime/pytorch # for users in the mainland of China, use pytorch-cn-mirror instead.

    Build Starwhale Runtime

    ❯ swcli -vvv runtime build --yaml runtime.yaml

    Use Starwhale Runtime in the standalone instance

    Use Starwhale Runtime in the shell

    # Activate the runtime
    swcli runtime activate pytorch

    swcli runtime activate will download all python dependencies of the runtime, which may take a long time.

    All dependencies are ready in your python environment when the runtime is activated. It is similar to source venv/bin/activate of virtualenv or the conda activate command of conda. If you close the shell or switch to another shell, you need to reactivate the runtime.

    Use Starwhale Runtime in swcli

    # Use the runtime when building a Starwhale Model
    swcli model build . --runtime pytorch
    # Use the runtime when building a Starwhale Dataset
    swcli dataset build --yaml /path/to/dataset.yaml --runtime pytorch
    # Run a model evaluation with the runtime
    swcli model run --uri mnist/version/v0 --dataset mnist --runtime pytorch

    Copy Starwhale Runtime to another instance

    You can copy the runtime to a server/cloud instance, which can then be used in the server/cloud instance or downloaded by other users.

    # Copy the runtime to a server instance named 'pre-k8s'
    ❯ swcli runtime copy pytorch cloud://pre-k8s/project/starwhale
    - + \ No newline at end of file diff --git a/0.5.12/getting-started/server/index.html b/0.5.12/getting-started/server/index.html index d184f6516..4e11ff52b 100644 --- a/0.5.12/getting-started/server/index.html +++ b/0.5.12/getting-started/server/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    Getting started with Starwhale Server

    Install Starwhale Server

    To install Starwhale Server, see the installation guide.

    Create your first project

    Login to the server

    Open your browser and enter your server's URL in the address bar. Login with your username(starwhale) and password(abcd1234).

    console-artifacts.gif

    Create a new project

    Build the dataset, model, and runtime on your local machine

    Follow step 1 to step 4 in Getting started with Starwhale Standalone to create:

    • a Starwhale model named mnist
    • a Starwhale dataset named mnist
    • a Starwhale runtime named pytorch

    Copy the dataset, the model, and the runtime to the server

    swcli instance login --username <your username> --password <your password> --alias server <Your Server URL>

    swcli model copy mnist server/project/demo
    swcli dataset copy mnist server/project/demo
    swcli runtime copy pytorch server/project/demo

    Use the Web UI to run an evaluation

    Navigate to the "demo" project in your browser and create a new one.

    console-create-job.gif

    Congratulations! You have completed the Starwhale Server Getting Started Guide.

    - + \ No newline at end of file diff --git a/0.5.12/getting-started/standalone/index.html b/0.5.12/getting-started/standalone/index.html index dfe48ae4c..6a8c9fe2d 100644 --- a/0.5.12/getting-started/standalone/index.html +++ b/0.5.12/getting-started/standalone/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    Getting started with Starwhale Standalone

    When the Starwhale Client (swcli) is installed, you are ready to use Starwhale Standalone.

    We also provide a Jupyter Notebook example, you can try it in Google Colab or in your local vscode/jupyterlab.

    Downloading Examples

    Download Starwhale examples by cloning the Starwhale project via:

    GIT_LFS_SKIP_SMUDGE=1 git clone https://github.com/star-whale/starwhale.git --depth 1
    cd starwhale

    To save time in the example downloading, we skip git-lfs and other commits info. We will use ML/DL HelloWorld code MNIST to start your Starwhale journey. The following steps are all performed in the starwhale directory.

    Core Workflow

    Building a Pytorch Runtime

    Runtime example codes are in the example/runtime/pytorch directory.

    • Build the Starwhale runtime bundle:

      swcli runtime build --yaml example/runtime/pytorch/runtime.yaml
      tip

      When you first build runtime, creating an isolated python environment and downloading python dependencies will take a lot of time. The command execution time is related to the network environment of the machine and the number of packages in the runtime.yaml. Using the befitting pypi mirror and cache config in the ~/.pip/pip.conf file is a recommended practice.

      For users in the mainland of China, the following conf file is an option:

      [global]
      cache-dir = ~/.cache/pip
      index-url = https://pypi.tuna.tsinghua.edu.cn/simple
      extra-index-url = https://mirrors.aliyun.com/pypi/simple/
    • Check your local Starwhale Runtime:

      swcli runtime list
      swcli runtime info pytorch

    Building a Model

    Model example codes are in the example/mnist directory.

    • Download the pre-trained model file:

      cd example/mnist
      make download-model
      # For users in the mainland of China, please add `CN=1` environment for make command:
      # CN=1 make download-model
      cd -
    • Build a Starwhale model:

      swcli model build example/mnist --runtime pytorch
    • Check your local Starwhale models:

      swcli model list
      swcli model info mnist

    Building a Dataset

    Dataset example codes are in the example/mnist directory.

    • Download the MNIST raw data:

      cd example/mnist
      make download-data
      # For users in the mainland of China, please add `CN=1` environment for make command:
      # CN=1 make download-data
      cd -
    • Build a Starwhale dataset:

      swcli dataset build --yaml example/mnist/dataset.yaml
    • Check your local Starwhale dataset:

      swcli dataset list
      swcli dataset info mnist
      swcli dataset head mnist

    Running an Evaluation Job

    • Create an evaluation job:

      swcli -vvv model run --uri mnist --dataset mnist --runtime pytorch
    • Check the evaluation result

      swcli job list
      swcli job info $(swcli job list | grep mnist | grep success | awk '{print $1}' | head -n 1)

    Congratulations! You have completed the Starwhale Standalone Getting Started Guide.

    - + \ No newline at end of file diff --git a/0.5.12/index.html b/0.5.12/index.html index 336d1bd33..dba2ac0b9 100644 --- a/0.5.12/index.html +++ b/0.5.12/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    What is Starwhale

    Overview

    Starwhale is an MLOps/LLMOps platform that make your model creation, evaluation and publication much easier. It aims to create a handy tool for data scientists and machine learning engineers.

    Starwhale helps you:

    • Keep track of your training/testing dataset history including data items and their labels, so that you can easily access them.
    • Manage your model packages that you can share across your team.
    • Run your models in different environments, either on a Nvidia GPU server or on an embedded device like Cherry Pi.
    • Create a online service with interactive Web UI for your models.

    Starwhale is designed to be an open platform. You can create your own plugins to meet your requirements.

    Deployment options

    Each deployment of Starwhale is called an instance. All instances can be managed by the Starwhale Client (swcli).

    You can start using Starwhale with one of the following instance types:

    • Starwhale Standalone - Rather than a running service, Starwhale Standalone is actually a repository that resides in your local file system. It is created and managed by the Starwhale Client (swcli). You only need to install swcli to use it. Currently, each user on a single machine can have only ONE Starwhale Standalone instance. We recommend you use the Starwhale Standalone to build and test your datasets, runtime, and models before pushing them to Starwhale Server/Cloud instances.
    • Starwhale Server - Starwhale Server is a service deployed on your local server. Besides text-only results from the Starwhale Client (swcli), Starwhale Server provides Web UI for you to manage your datasets and models, evaluate your models in your local Kubernetes cluster, and review the evaluation results.
    • Starwhale Cloud - Starwhale Cloud is a managed service hosted on public clouds. By registering an account on https://cloud.starwhale.cn, you are ready to use Starwhale without needing to install, operate, and maintain your own instances. Starwhale Cloud also provides public resources for you to download, like datasets, runtimes, and models. Check the "starwhale/public" project on Starwhale Cloud for more details.

    When choosing which instance type to use, consider the following:

    Instance TypeDeployment locationMaintained byUser InterfaceScalability
    Starwhale StandaloneYour laptop or any server in your data centerNot requiredCommand lineNot scalable
    Starwhale ServerYour data centerYourselfWeb UI and command lineScalable, depends on your Kubernetes cluster
    Starwhale CloudPublic cloud, like AWS or Aliyunthe Starwhale TeamWeb UI and command lineScalable, but currently limited by the freely available resource on the cloud
    - + \ No newline at end of file diff --git a/0.5.12/model/index.html b/0.5.12/model/index.html index 3754714e8..537c44bbf 100644 --- a/0.5.12/model/index.html +++ b/0.5.12/model/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    Starwhale Model

    A Starwhale Model is a standard format for packaging machine learning models that can be used for various purposes, like model fine-tuning, model evaluation, and online serving. A Starwhale Model contains the model file, inference codes, configuration files, and any other files required to run the model.

    Create a Starwhale Model

    There are two ways to create a Starwhale Model: by swcli or by Python SDK.

    Create a Starwhale Model by swcli

    To create a Starwhale Model by swcli, you need to define a model.yaml, which describes some required information about the model package, and run the following command:

    swcli model build . --model-yaml /path/to/model.yaml

    For more information about the command and model.yaml, see the swcli reference. model.yaml is optional for model building.

    Create a Starwhale Model by Python SDK

    from starwhale import model, predict

    @predict
    def predict_img(data):
    ...

    model.build(name="mnist", modules=[predict_img])

    Model Management

    Model Management by swcli

    CommandDescription
    swcli model listList all Starwhale Models in a project
    swcli model infoShow detail information about a Starwhale Model
    swcli model copyCopy a Starwhale Model to another location
    swcli model removeRemove a Starwhale Model
    swcli model recoverRecover a previously removed Starwhale Model

    Model Management by WebUI

    Model History

    Starwhale Models are versioned. The general rules about versions are described in Resource versioning in Starwhale.

    Model History Management by swcli

    CommandDescription
    swcli model historyList all versions of a Starwhale Model
    swcli model infoShow detail information about a Starwhale Model version
    swcli model diffCompare two versions of a Starwhale model
    swcli model copyCopy a Starwhale Model version to a new one
    swcli model removeRemove a Starwhale Model version
    swcli model recoverRecover a previously removed Starwhale Model version

    Model Evaluation

    Model Evaluation by swcli

    CommandDescription
    swcli model runCreate an evaluation with a Starwhale Model

    The Storage Format

    The Starwhale Model is a tarball file that contains the source directory.

    - + \ No newline at end of file diff --git a/0.5.12/model/yaml/index.html b/0.5.12/model/yaml/index.html index 5831cfde1..b46cba8a7 100644 --- a/0.5.12/model/yaml/index.html +++ b/0.5.12/model/yaml/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    The model.yaml Specification

    tip

    model.yaml is optional for swcli model build.

    When building a Starwhale Model using the swcli model build command, you can specify a yaml file that follows a specific format via the --model-yaml parameter to simplify specifying build parameters.

    Even without specifying the --model-yaml parameter, swcli model build will automatically look for a model.yaml file under the ${workdir} directory and extract parameters from it. Parameters specified on the swcli model build command line take precedence over equivalent configurations in model.yaml, so you can think of model.yaml as a file-based representation of the build command line.

    When building a Starwhale Model using the Python SDK, the model.yaml file does not take effect.

    YAML Field Descriptions

    FieldDescriptionRequiredTypeDefault
    nameName of the Starwhale Model, equivalent to --name parameter.NoString
    run.modulesPython Modules searched during model build, can specify multiple entry points for model execution, format is Python Importable path. Equivalent to --module parameter.YesList[String]
    run.handlerDeprecated alias of run.modules, can only specify one entry point.NoString
    versiondataset.yaml format version, currently only supports "1.0"NoString1.0
    descModel description, equivalent to --desc parameter.NoString

    Example


    name: helloworld

    run:
    modules:
    - src.evaluator

    desc: "example yaml"

    A Starwhale model named helloworld, searches for functions decorated with @evaluation.predict, @evaluation.evaluate or @handler, or classes inheriting from PipelineHandler in src/evaluator.py under ${WORKDIR} of the swcli model build command. These functions or classes will be added to the list of runnable entry points for the Starwhale model. When running the model via swcli model run or Web UI, select the corresponding entry point (handler) to run.

    model.yaml is optional, parameters defined in yaml can also be specified via swcli command line parameters.


    swcli model build . --model-yaml model.yaml

    Is equivalent to:


    swcli model build . --name helloworld --module src.evaluator --desc "example yaml"

    - + \ No newline at end of file diff --git a/0.5.12/reference/sdk/dataset/index.html b/0.5.12/reference/sdk/dataset/index.html index 51e51ddb8..eacdceb56 100644 --- a/0.5.12/reference/sdk/dataset/index.html +++ b/0.5.12/reference/sdk/dataset/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    Starwhale Dataset SDK

    dataset

    Get starwhale.Dataset object, by creating new datasets or loading existing datasets.

    @classmethod
    def dataset(
    cls,
    uri: t.Union[str, Resource],
    create: str = _DatasetCreateMode.auto,
    readonly: bool = False,
    ) -> Dataset:

    Parameters

    • uri: (str or Resource, required)
      • The dataset uri or Resource object.
    • create: (str, optional)
      • The mode of dataset creating. The options are auto, empty and forbid.
        • auto mode: If the dataset already exists, creation is ignored. If it does not exist, the dataset is created automatically.
        • empty mode: If the dataset already exists, an Exception is raised; If it does not exist, an empty dataset is created. This mode ensures the creation of a new, empty dataset.
        • forbid mode: If the dataset already exists, nothing is done.If it does not exist, an Exception is raised. This mode ensures the existence of the dataset.
      • The default is auto.
    • readonly: (bool, optional)
      • For an existing dataset, you can specify the readonly=True argument to ensure the dataset is in readonly mode.
      • Default is False.

    Examples

    from starwhale import dataset, Image

    # create a new dataset named mnist, and add a row into the dataset
    # dataset("mnist") is equal to dataset("mnist", create="auto")
    ds = dataset("mnist")
    ds.exists() # return False, "mnist" dataset is not existing.
    ds.append({"img": Image(), "label": 1})
    ds.commit()
    ds.close()

    # load a cloud instance dataset in readonly mode
    ds = dataset("cloud://remote-instance/project/starwhale/dataset/mnist", readonly=True)
    labels = [row.features.label in ds]
    ds.close()

    # load a read/write dataset with a specified version
    ds = dataset("mnist/version/mrrdczdbmzsw")
    ds[0].features.label = 1
    ds.commit()
    ds.close()

    # create an empty dataset
    ds = dataset("mnist-empty", create="empty")

    # ensure the dataset existence
    ds = dataset("mnist-existed", create="forbid")

    class starwhale.Dataset

    starwhale.Dataset implements the abstraction of a Starwhale dataset, and can operate on datasets in Standalone/Server/Cloud instances.

    from_huggingface

    from_huggingface is a classmethod that can convert a Huggingface dataset into a Starwhale dataset.

    def from_huggingface(
    cls,
    name: str,
    repo: str,
    subset: str | None = None,
    split: str | None = None,
    revision: str = "main",
    alignment_size: int | str = D_ALIGNMENT_SIZE,
    volume_size: int | str = D_FILE_VOLUME_SIZE,
    mode: DatasetChangeMode | str = DatasetChangeMode.PATCH,
    cache: bool = True,
    tags: t.List[str] | None = None,
    ) -> Dataset:

    Parameters

    • name: (str, required)
      • dataset name.
    • repo: (str, required)
    • subset: (str, optional)
      • The subset name. If the huggingface dataset has multiple subsets, you must specify the subset name.
    • split: (str, optional)
      • The split name. If the split name is not specified, the all splits dataset will be built.
    • revision: (str, optional)
      • The huggingface datasets revision. The default value is main. If the split name is not specified, the all splits dataset will be built.
    • alignment_size: (int|str, optional)
      • The blob alignment size.
      • The default value is 128 Bytes.
    • volume_size: (int|str, optional)
      • The maximum size of a dataset blob file. A new blob file will be generated when the size exceeds this limit.
      • The default value is 64MB.
    • mode: (str|DatasetChangeMode, optional)
      • The dataset change mode. The default value is patch. Mode choices are patch and overwrite.
    • cache: (bool, optional)
      • Whether to use huggingface dataset cache(download + local hf dataset).
      • The default value is True.
    • tags: (List[str], optional)
      • The user custom tags of the dataset.

    Examples

    from starwhale import Dataset
    myds = Dataset.from_huggingface("mnist", "mnist")
    print(myds[0])
    from starwhale import Dataset
    myds = Dataset.from_huggingface("mmlu", "cais/mmlu", subset="anatomy", split="auxiliary_train", revision="7456cfb")

    from_json

    from_json is a classmethod that can convert a json text into a Starwhale dataset.

    @classmethod
    def from_json(
    cls,
    name: str,
    json_text: str,
    field_selector: str = "",
    alignment_size: int | str = D_ALIGNMENT_SIZE,
    volume_size: int | str = D_FILE_VOLUME_SIZE,
    mode: DatasetChangeMode | str = DatasetChangeMode.PATCH,
    tags: t.List[str] | None = None,
    ) -> Dataset:

    Parameters

    • name: (str, required)
      • Dataset name.
    • json_text: (str, required)
      • A json string. The from_json function deserializes this string into Python objects to start building the Starwhale dataset.
    • field_selector: (str, optional)
      • The filed from which you would like to extract dataset array items.
      • The default value is "" which indicates that the json object is an array contains all the items.
    • alignment_size: (int|str, optional)
      • The blob alignment size.
      • The default value is 128 Bytes.
    • volume_size: (int|str, optional)
      • The maximum size of a dataset blob file. A new blob file will be generated when the size exceeds this limit.
      • The default value is 64MB.
    • mode: (str|DatasetChangeMode, optional)
      • The dataset change mode. The default value is patch. Mode choices are patch and overwrite.
    • tags: (List[str], optional)
      • The user custom tags of the dataset.

    Examples

    from starwhale import Dataset
    myds = Dataset.from_json(
    name="translation",
    json_text='[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]'
    )
    print(myds[0].features.en)
    from starwhale import Dataset
    myds = Dataset.from_json(
    name="translation",
    json_text='{"content":{"child_content":[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]}}',
    field_selector="content.child_content"
    )
    print(myds[0].features["zh-cn"])

    from_folder

    from_folder is a classmethod that can read Image/Video/Audio data from a specified directory and automatically convert them into a Starwhale dataset. This function supports the following features:

    • It can recursively search the target directory and its subdirectories
    • Supports extracting three types of files:
      • image: Supports png/jpg/jpeg/webp/svg/apng image types. Image files will be converted to Starwhale.Image type.
      • video: Supports mp4/webm/avi video types. Video files will be converted to Starwhale.Video type.
      • audio: Supports mp3/wav audio types. Audio files will be converted to Starwhale.Audio type.
    • Each file corresponds to one record in the dataset, with the file stored in the file field.
    • If auto_label=True, the parent directory name will be used as the label for that record, stored in the label field. Files in the root directory will not be labeled.
    • If a txt file with the same name as an image/video/audio file exists, its content will be stored as the caption field in the dataset.
    • If metadata.csv or metadata.jsonl exists in the root directory, their content will be read automatically and associated to records by file path as meta information in the dataset.
      • metadata.csv and metadata.jsonl are mutually exclusive. An exception will be thrown if both exist.
      • Each record in metadata.csv and metadata.jsonl must contain a file_name field pointing to the file path.
      • metadata.csv and metadata.jsonl are optional for dataset building.
    @classmethod
    def from_folder(
    cls,
    folder: str | Path,
    kind: str | DatasetFolderSourceType,
    name: str | Resource = "",
    auto_label: bool = True,
    alignment_size: int | str = D_ALIGNMENT_SIZE,
    volume_size: int | str = D_FILE_VOLUME_SIZE,
    mode: DatasetChangeMode | str = DatasetChangeMode.PATCH,
    tags: t.List[str] | None = None,
    ) -> Dataset:

    Parameters

    • folder: (str|Path, required)
      • The folder path from which you would like to create this dataset.
    • kind: (str|DatasetFolderSourceType, required)
      • The dataset source type you would like to use, the choices are: image, video and audio.
      • Recursively searching for files of the specified kind in folder. Other file types will be ignored.
    • name: (str|Resource, optional)
      • The dataset name you would like to use.
      • If not specified, the name is the folder name.
    • auto_label: (bool, optional)
      • Whether to auto label by the sub-folder name.
      • The default value is True.
    • alignment_size: (int|str, optional)
      • The blob alignment size.
      • The default value is 128 Bytes.
    • volume_size: (int|str, optional)
      • The maximum size of a dataset blob file. A new blob file will be generated when the size exceeds this limit.
      • The default value is 64MB.
    • mode: (str|DatasetChangeMode, optional)
      • The dataset change mode. The default value is patch. Mode choices are patch and overwrite.
    • tags: (List[str], optional)
      • The user custom tags of the dataset.

    Examples ${folder-example}

    • Example for the normal function calling

      from starwhale import Dataset

      # create a my-image-dataset dataset from /path/to/image folder.
      ds = Dataset.from_folder(
      folder="/path/to/image",
      kind="image",
      name="my-image-dataset"
      )
    • Example for caption

      folder/dog/1.png
      folder/dog/1.txt

      1.txt content will be used as the caption of 1.png.

    • Example for metadata

      metadata.csv:

      file_name, caption
      1.png, dog
      2.png, cat

      metadata.jsonl:

      {"file_name": "1.png", "caption": "dog"}
      {"file_name": "2.png", "caption": "cat"}
    • Example for auto-labeling

      The following structure will create a dataset with 2 labels: "cat" and "dog", 4 images in total.

      folder/dog/1.png
      folder/cat/2.png
      folder/dog/3.png
      folder/cat/4.png

    __iter__

    __iter__ a method that iter the dataset rows.

    from starwhale import dataset

    ds = dataset("mnist")

    for item in ds:
    print(item.index)
    print(item.features.label) # label and img are the features of mnist.
    print(item.features.img)

    batch_iter

    batch_iter is a method that iter the dataset rows in batch.

    def batch_iter(
    self, batch_size: int = 1, drop_not_full: bool = False
    ) -> t.Iterator[t.List[DataRow]]:

    Parameters

    • batch_size: (int, optional)
      • batch size. The default value is 1.
    • drop_not_full: (bool, optional)
      • Whether the last batch of data, with a size smaller than batch_size, it will be discarded.
      • The default value is False.

    Examples

    from starwhale import dataset

    ds = dataset("mnist")
    for batch_rows in ds.batch_iter(batch_size=2):
    assert len(batch_rows) == 2
    print(batch_rows[0].features)

    __getitem__

    __getitem__ is a method that allows retrieving certain rows of data from the dataset, with usage similar to Python dict and list types.

    from starwhale import dataset

    ds = dataset("mock-int-index")

    # if the index type is string
    ds["str_key"] # get the DataRow by the "str_key" string key
    ds["start":"end"] # get a slice of the dataset by the range ("start", "end")

    ds = dataset("mock-str-index")
    # if the index type is int
    ds[1] # get the DataRow by the 1 int key
    ds[1:10:2] # get a slice of the dataset by the range (1, 10), step is 2

    __setitem__

    __setitem__ is a method that allows updating rows of data in the dataset, with usage similar to Python dicts. __setitem__ supports multi-threaded parallel data insertion.

    def __setitem__(
    self, key: t.Union[str, int], value: t.Union[DataRow, t.Tuple, t.Dict]
    ) -> None:

    Parameters

    • key: (int|str, required)
      • key is the index for each row in the dataset. The type is int or str, but a dataset only accepts one type.
    • value: (DataRow|tuple|dict, required)
      • value is the features for each row in the dataset, using a Python dict is generally recommended.

    Examples

    • Normal insertion

    Insert two rows into the test dataset, with index test and test2 repectively:

    from starwhale import dataset

    with dataset("test") as ds:
    ds["test"] = {"txt": "abc", "int": 1}
    ds["test2"] = {"txt": "bcd", "int": 2}
    ds.commit()
    • Parallel insertion
    from starwhale import dataset, Binary
    from concurrent.futures import as_completed, ThreadPoolExecutor

    ds = dataset("test")

    def _do_append(_start: int) -> None:
    for i in range(_start, 100):
    ds.append((i, {"data": Binary(), "label": i}))

    pool = ThreadPoolExecutor(max_workers=10)
    tasks = [pool.submit(_do_append, i * 10) for i in range(0, 9)]

    ds.commit()
    ds.close()

    __delitem__

    __delitem__ is a method to delete certain rows of data from the dataset.

    def __delitem__(self, key: _ItemType) -> None:
    from starwhale import dataset

    ds = dataset("existed-ds")
    del ds[6:9]
    del ds[0]
    ds.commit()
    ds.close()

    append

    append is a method to append data to a dataset, similar to the append method for Python lists.

    • Adding features dict, each row is automatically indexed with int starting from 0 and incrementing.

      from starwhale import dataset, Image

      with dataset("new-ds") as ds:
      for i in range(0, 100):
      ds.append({"label": i, "image": Image(f"folder/{i}.png")})
      ds.commit()
    • By appending the index and features dictionary, the index of each data row in the dataset will not be handled automatically.

      from dataset import dataset, Image

      with dataset("new-ds") as ds:
      for i in range(0, 100):
      ds.append((f"index-{i}", {"label": i, "image": Image(f"folder/{i}.png")}))

      ds.commit()

    extend

    extend is a method to bulk append data to a dataset, similar to the extend method for Python lists.

    from starwhale import dataset, Text

    ds = dataset("new-ds")
    ds.extend([
    (f"label-{i}", {"text": Text(), "label": i}) for i in range(0, 10)
    ])
    ds.commit()
    ds.close()

    commit

    commit is a method that flushes the current cached data to storage when called, and generates a dataset version. This version can then be used to load the corresponding dataset content afterwards.

    For a dataset, if some data is added without calling commit, but close is called or the process exits directly instead, the data will still be written to the dataset, just without generating a new version.

    @_check_readonly
    def commit(
    self,
    tags: t.Optional[t.List[str]] = None,
    message: str = "",
    force_add_tags: bool = False,
    ignore_add_tags_errors: bool = False,
    ) -> str:

    Parameters

    • tags: (list(str), optional)
      • tag as a list
    • message: (str, optional)
      • commit message. The default value is empty.
    • force_add_tags: (bool, optional)
      • For server/cloud instances, when adding labels to this version, if a label has already been applied to other dataset versions, you can use the force_add_tags=True parameter to forcibly add the label to this version, otherwise an exception will be thrown.
      • The default is False.
    • ignore_add_tags_errors: (bool, optional)
      • Ignore any exceptions thrown when adding labels.
      • The default is False.

    Examples

    from starwhale import dataset
    with dataset("mnist") as ds:
    ds.append({"label": 1})
    ds.commit(message="init commit")

    readonly

    readonly is a property attribute indicating if the dataset is read-only, it returns a bool value.

    from starwhale import dataset
    ds = dataset("mnist", readonly=True)
    assert ds.readonly

    loading_version

    loading_version is a property attribute, string type.

    • When loading an existing dataset, the loading_version is the related dataset version.
    • When creating a non-existed dataset, the loading_version is equal to the pending_commit_version.

    pending_commit_version

    pending_commit_version is a property attribute, string type. When you call the commit function, the pending_commit_version will be recorded in the Standalone instance ,Server instance or Cloud instance.

    committed_version

    committed_version is a property attribute, string type. After the commit function is called, the committed_version will come out, it is equal to the pending_commit_version. Accessing this attribute without calling commit first will raise an exception.

    remove

    remove is a method equivalent to the swcli dataset remove command, it can delete a dataset.

    def remove(self, force: bool = False) -> None:

    recover

    recover is a method equivalent to the swcli dataset recover command, it can recover a soft-deleted dataset that has not been run garbage collection.

    def recover(self, force: bool = False) -> None:

    summary

    summary is a method equivalent to the swcli dataset summary command, it returns summary information of the dataset.

    def summary(self) -> t.Optional[DatasetSummary]:

    history

    history is a method equivalent to the swcli dataset history command, it returns the history records of the dataset.

    def history(self) -> t.List[t.Dict]:

    flush

    flush is a method that flushes temporarily cached data from memory to persistent storage. The commit and close methods will automatically call flush.

    close

    close is a method that closes opened connections related to the dataset. Dataset also implements contextmanager, so datasets can be automatically closed using with syntax without needing to explicitly call close.

    from starwhale import dataset

    ds = dataset("mnist")
    ds.close()

    with dataset("mnist") as ds:
    print(ds[0])

    head is a method to show the first n rows of a dataset, equivalent to the swcli dataset head command.

    def head(self, n: int = 5, skip_fetch_data: bool = False) -> List[DataRow]:

    fetch_one

    fetch_one is a method to get the first record in a dataset, similar to head(n=1)[0].

    list

    list is a class method to list Starwhale datasets under a project URI, equivalent to the swcli dataset list command.

    @classmethod
    def list(
    cls,
    project_uri: Union[str, Project] = "",
    fullname: bool = False,
    show_removed: bool = False,
    page_index: int = DEFAULT_PAGE_IDX,
    page_size: int = DEFAULT_PAGE_SIZE,
    ) -> Tuple[List[Dict[str, Any]], Dict[str, Any]]:

    copy

    copy is a method to copy a dataset to another instance, equivalent to the swcli dataset copy command.

    def copy(
    self,
    dest_uri: str,
    dest_local_project_uri: str = "",
    force: bool = False,
    mode: str = DatasetChangeMode.PATCH.value,
    ignore_tags: t.List[str] | None = None,
    ) -> None:

    Parameters

    • dest_uri: (str, required)
      • Dataset URI
    • dest_local_project_uri: (str, optional)
      • When copy the remote dataset into local, the parameter can set for the Project URI.
    • force: (bool, optional)
      • Whether to forcibly overwrite the dataset if there is already one with the same version on the target instance.
      • The default value is False.
      • When the tags are already used for the other dataset version in the dest instance, you should use force option or adjust the tags.
    • mode: (str, optional)
      • Dataset copy mode, default is 'patch'. Mode choices are: 'patch', 'overwrite'.
      • patch: Patch mode, only update the changed rows and columns for the remote dataset.
      • overwrite: Overwrite mode, update records and delete extraneous rows from the remote dataset.
    • ignore_tags (List[str], optional)
      • Ignore tags when copying.
      • In default, copy dataset with all user custom tags.
      • latest and ^v\d+$ are the system builtin tags, they are ignored automatically.

    Examples

    from starwhale import dataset
    ds = dataset("mnist")
    ds.copy("cloud://remote-instance/project/starwhale")

    to_pytorch

    to_pytorch is a method that can convert a Starwhale dataset to a Pytorch torch.utils.data.Dataset, which can then be passed to torch.utils.data.DataLoader for use.

    It should be noted that the to_pytorch function returns a Pytorch IterableDataset.

    def to_pytorch(
    self,
    transform: t.Optional[t.Callable] = None,
    drop_index: bool = True,
    skip_default_transform: bool = False,
    ) -> torch.utils.data.Dataset:

    Parameters

    • transform: (callable, optional)
      • A transform function for input data.
    • drop_index: (bool, optional)
      • Whether to drop the index column.
    • skip_default_transform: (bool, optional)
      • If transform is not set, by default the built-in Starwhale transform function will be used to transform the data. This can be disabled with the skip_default_transform parameter.

    Examples

    import torch.utils.data as tdata
    from starwhale import dataset

    ds = dataset("mnist")

    torch_ds = ds.to_pytorch()
    torch_loader = tdata.DataLoader(torch_ds, batch_size=2)
    import torch.utils.data as tdata
    from starwhale import dataset

    with dataset("mnist") as ds:
    for i in range(0, 10):
    ds.append({"txt": Text(f"data-{i}"), "label": i})

    ds.commit()

    def _custom_transform(data: t.Any) -> t.Any:
    data = data.copy()
    txt = data["txt"].to_str()
    data["txt"] = f"custom-{txt}"
    return data

    torch_loader = tdata.DataLoader(
    dataset(ds.uri).to_pytorch(transform=_custom_transform), batch_size=1
    )
    item = next(iter(torch_loader))
    assert isinstance(item["label"], torch.Tensor)
    assert item["txt"][0] in ("custom-data-0", "custom-data-1")

    to_tensorflow

    to_tensorflow is a method that can convert a Starwhale dataset to a Tensorflow tensorflow.data.Dataset.

    def to_tensorflow(self, drop_index: bool = True) -> tensorflow.data.Dataset:

    Parameters

    • drop_index: (bool, optional)
      • Whether to drop the index column.

    Examples

    from starwhale import dataset
    import tensorflow as tf

    ds = dataset("mnist")
    tf_ds = ds.to_tensorflow(drop_index=True)
    assert isinstance(tf_ds, tf.data.Dataset)

    with_builder_blob_config

    with_builder_blob_config is a method to set blob-related attributes in a Starwhale dataset. It needs to be called before making data changes.

    def with_builder_blob_config(
    self,
    volume_size: int | str | None = D_FILE_VOLUME_SIZE,
    alignment_size: int | str | None = D_ALIGNMENT_SIZE,
    ) -> Dataset:

    Parameters

    • alignment_size: (int|str, optional)
      • The blob alignment size.
      • The default value is 128 Bytes.
    • volume_size: (int|str, optional)
      • The maximum size of a dataset blob file. A new blob file will be generated when the size exceeds this limit.
      • The default value is 64MB.

    Examples

    from starwhale import dataset, Binary

    ds = dataset("mnist").with_builder_blob_config(volume_size="32M", alignment_size=128)
    ds.append({"data": Binary(b"123")})
    ds.commit()
    ds.close()

    with_loader_config

    with_loader_config is a method to set parameters for the Starwhale dataset loader process.

    def with_loader_config(
    self,
    num_workers: t.Optional[int] = None,
    cache_size: t.Optional[int] = None,
    field_transformer: t.Optional[t.Dict] = None,
    ) -> Dataset:

    Parameters

    • num_workers: (int, optional)
      • The workers number for loading dataset.
      • The default value is 2.
    • cache_size: (int, optional)
      • Prefetched data rows.
      • The default value is 20.
    • field_transformer: (dict, optional)
      • features name transform dict.

    Examples

    from starwhale import Dataset, dataset
    Dataset.from_json(
    "translation",
    '[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]'
    )
    myds = dataset("translation").with_loader_config(field_transformer={"en": "en-us"})
    assert myds[0].features["en-us"] == myds[0].features["en"]
    from starwhale import Dataset, dataset
    Dataset.from_json(
    "translation2",
    '[{"content":{"child_content":[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]}}]'
    )
    myds = dataset("translation2").with_loader_config(field_transformer={"content.child_content[0].en": "en-us"})
    assert myds[0].features["en-us"] == myds[0].features["content"]["child_content"][0]["en"]
    - + \ No newline at end of file diff --git a/0.5.12/reference/sdk/evaluation/index.html b/0.5.12/reference/sdk/evaluation/index.html index a53d619fb..9e8b06149 100644 --- a/0.5.12/reference/sdk/evaluation/index.html +++ b/0.5.12/reference/sdk/evaluation/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    Starwhale Model Evaluation SDK

    @evaluation.predict

    The @evaluation.predict decorator defines the inference process in the Starwhale Model Evaluation, similar to the map phase in MapReduce. It contains the following core features:

    • On the Server instance, require the resources needed to run.
    • Automatically read the local or remote datasets, and pass the data in the datasets one by one or in batches to the function decorated by evaluation.predict.
    • By the replicas setting, implement distributed dataset consumption to horizontally scale and shorten the time required for the model evaluation tasks.
    • Automatically store the return values of the function and the input features of the dataset into the results table, for display in the Web UI and further use in the evaluate phase.
    • The decorated function is called once for each single piece of data or each batch, to complete the inference process.

    Parameters

    • resources: (dict, optional)
      • Defines the resources required by each predict task when running on the Server instance, including mem, cpu, and nvidia.com/gpu.
      • mem: The unit is Bytes, int and float types are supported.
        • Supports setting request and limit as a dictionary, e.g. resources={"mem": {"request": 100 * 1024, "limit": 200 * 1024}}.
        • If only a single number is set, the Python SDK will automatically set request and limit to the same value, e.g. resources={"mem": 100 * 1024} is equivalent to resources={"mem": {"request": 100 * 1024, "limit": 100 * 1024}}.
      • cpu: The unit is the number of CPU cores, int and float types are supported.
        • Supports setting request and limit as a dictionary, e.g. resources={"cpu": {"request": 1, "limit": 2}}.
        • If only a single number is set, the SDK will automatically set request and limit to the same value, e.g. resources={"cpu": 1.5} is equivalent to resources={"cpu": {"request": 1.5, "limit": 1.5}}.
      • nvidia.com/gpu: The unit is the number of GPUs, int type is supported.
        • nvidia.com/gpu does not support setting request and limit, only a single number is supported.
      • Note: The resources parameter currently only takes effect on the Server instances. For the Cloud instances, the same can be achieved by selecting the corresponding resource pool when submitting the evaluation task. Standalone instances do not support this feature at all.
    • replicas: (int, optional)
      • The number of replicas to run predict.
      • predict defines a Step, in which there are multiple equivalent Tasks. Each Task runs on a Pod in Cloud/Server instances, and a Thread in Standalone instances.
      • When multiple replicas are specified, they are equivalent and will jointly consume the selected dataset to achieve distributed dataset consumption. It can be understood that a row in the dataset will only be read by one predict replica.
      • The default is 1.
    • batch_size: (int, optional)
      • Batch size for passing data from the dataset into the function.
      • The default is 1.
    • fail_on_error: (bool, optional)
      • Whether to interrupt the entire model evaluation when the decorated function throws an exception. If you expect some "exceptional" data to cause evaluation failures but don't want to interrupt the overall evaluation, you can set fail_on_error=False.
      • The default is True.
    • auto_log: (bool, optional)
      • Whether to automatically log the return values of the function and the input features of the dataset to the results table.
      • The default is True.
    • log_mode: (str, optional)
      • When auto_log=True, you can set log_mode to define logging the return values in plain or pickle format.
      • The default is pickle.
    • log_dataset_features: (List[str], optional)
      • When auto_log=True, you can selectively log certain features from the dataset via this parameter.
      • By default, all features will be logged.
    • needs: (List[Callable], optional)
      • Defines the prerequisites for this task to run, can use the needs syntax to implement DAG.
      • needs accepts functions decorated by @evaluation.predict, @evaluation.evaluate, and @handler.
      • The default is empty, i.e. does not depend on any other tasks.

    Input

    The decorated functions need to define some input parameters to accept dataset data, etc. They contain the following patterns:

    • data:

      • data is a dict type that can read the features of the dataset.
      • When batch_size=1 or batch_size is not set, the label feature can be read through data['label'] or data.label.
      • When batch_size is set to > 1, data is a list.
      from starwhale import evaluation

      @evaluation.predict
      def predict(data):
      print(data['label'])
      print(data.label)
    • data + external:

      • data is a dict type that can read the features of the dataset.
      • external is also a dict, including: index, index_with_dataset, dataset_info, context and dataset_uri keys. The attributes can be used for the further fine-grained processing.
        • index: The index of the dataset row.
        • index_with_dataset: The index with the dataset info.
        • dataset_info: starwhale.core.dataset.tabular.TabularDatasetInfo Class.
        • context: starwhale.Context Class.
        • dataset_uri: starwhale.nase.uri.resource.Resource Class.
      from starwhale import evaluation

      @evaluation.predict
      def predict(data, external):
      print(data['label'])
      print(data.label)
      print(external["context"])
      print(external["dataset_uri"])
    • data + **kw:

      • data is a dict type that can read the features of the dataset.
      • kw is a dict that contains external.
      from starwhale import evaluation

      @evaluation.predict
      def predict(data, **kw):
      print(kw["external"]["context"])
      print(kw["external"]["dataset_uri"])
    • *args + **kwargs:

      • The first argument of args list is data.
      from starwhale import evaluation

      @evaluation.predict
      def predict(*args, **kw):
      print(args[0].label)
      print(args[0]["label"])
      print(kw["external"]["context"])
    • **kwargs:

      from starwhale import evaluation

      @evaluation.predict
      def predict(**kw):
      print(kw["data"].label)
      print(kw["data"]["label"])
      print(kw["external"]["context"])
    • *args:

      • *args does not contain external.
      from starwhale import evaluation

      @evaluation.predict
      def predict(*args):
      print(args[0].label)
      print(args[0]["label"])

    Examples

    from starwhale import evaluation

    @evaluation.predict
    def predict_image(data):
    ...

    @evaluation.predict(
    dataset="mnist/version/latest",
    batch_size=32,
    replicas=4,
    needs=[predict_image],
    )
    def predict_batch_images(batch_data)
    ...

    @evaluation.predict(
    resources={"nvidia.com/gpu": 1,
    "cpu": {"request": 1, "limit": 2},
    "mem": 200 * 1024}, # 200MB
    log_mode="plain",
    )
    def predict_with_resources(data):
    ...

    @evaluation.predict(
    replicas=1,
    log_mode="plain",
    log_dataset_features=["txt", "img", "label"],
    )
    def predict_with_selected_features(data):
    ...

    @evaluation.evaluate

    @evaluation.evaluate is a decorator that defines the evaluation process in the Starwhale Model evaluation, similar to the reduce phase in MapReduce. It contains the following core features:

    • On the Server instance, apply for the resources.
    • Read the data recorded in the results table automatically during the predict phase, and pass it into the function as an iterator.
    • The evaluate phase will only run one replica, and cannot define the replicas parameter like the predict phase.

    Parameters

    • resources: (dict, optional)
      • Consistent with the resources parameter definition in @evaluation.predict.
    • needs: (List[Callable], optional)
      • Consistent with the needs parameter definition in @evaluation.predict.
      • In the common case, it will depend on a function decorated by @evaluation.predict.
    • use_predict_auto_log: (bool, optional)
      • Defaults to True, passes an iterator that can traverse the predict results to the function.

    Input

    • When use_predict_auto_log=True (default), pass an iterator that can traverse the predict results into the function.
      • The iterated object is a dictionary containing two keys: output and input.
        • output is the element returned by the predict stage function.
        • input is the features of the corresponding dataset during the inference process, which is a dictionary type.
    • When use_predict_auto_log=False, do not pass any parameters into the function.

    Examples

    from starwhale import evaluation

    @evaluation.evaluate(needs=[predict_image])
    def evaluate_results(predict_result_iter):
    ...

    @evaluation.evaluate(
    use_predict_auto_log=False,
    needs=[predict_image],
    )
    def evaluate_results():
    ...

    evaluation.log

    evaluation.log is a function that logs the certain evaluation metrics to the specific tables, which can be viewed as the Web page in the Server/Cloud instance.

    Parameters

    • category: (str, required)
      • The category of the logged record, which will be used as a suffix for the Starwhale Datastore table name.
      • Each category corresponds to a Starwhale Datastore table, with these tables isolated by evaluation task ID without affecting each other.
    • id: (str|int, required)
      • The ID of the logged record, unique within the table.
      • Only one type, either str or int, can be used as ID type in the same table.
    • metrics: (dict, required)
      • A dictionary recording metrics in key-value pairs.

    Examples

    from starwhale import evaluation

    evaluation.log("label/1", 1, {"loss": 0.99, "accuracy": 0.98})
    evaluation.log("ppl", "1", {"a": "test", "b": 1})

    evaluation.log_summary

    evaluation.log_summary is a function that logs the certain metrics to the summary table. The evaluation page of a Server/Cloud instance displays data from the summary table.

    Each time it is called, Starwhale automatically updates the table using the unique ID of the current evaluation as the row ID. This function can be called multiple times during an evaluation to update different columns.

    Each project has one summary table, and all evaluation jobs under that project will log their summary information into this table.

    @classmethod
    def log_summary(cls, *args: t.Any, **kw: t.Any) -> None:

    Examples

    from starwhale import evaluation

    evaluation.log_summary(loss=0.99)
    evaluation.log_summary(loss=0.99, accuracy=0.99)
    evaluation.log_summary({"loss": 0.99, "accuracy": 0.99})

    evaluation.iter

    evaluation.iter is a function that returns an iterator for reading data iteratively from certain model evaluation tables.

    @classmethod
    def iter(cls, category: str) -> t.Iterator:

    Parameters

    • category: (str, required)
      • This parameter is consistent with the meaning of the category parameter in the evaluation.log function.

    Examples

    from starwhale import evaluation

    results = [data for data in evaluation.iter("label/0")]

    @handler

    @handler is a decorator that provides the following functionalities:

    • On a Server instance, it requests the required resources to run.
    • It can control the number of replicas.
    • Multiple handlers can form a DAG through dependency relationships to control the execution workflow.
    • It can expose ports externally to run like a web handler.

    @fine_tune, @evaluation.predict and @evaluation.evalute can be considered applications of @handler in the certain specific areas. @handler is the underlying implementation of these decorators and is more fundamental and flexible.

    @classmethod
    def handler(
    cls,
    resources: t.Optional[t.Dict[str, t.Any]] = None,
    replicas: int = 1,
    needs: t.Optional[t.List[t.Callable]] = None,
    name: str = "",
    expose: int = 0,
    require_dataset: bool = False,
    ) -> t.Callable:

    Parameters

    • resources: (dict, optional)
      • Consistent with the resources parameter definition in @evaluation.predict.
    • needs: (List[Callable], optional)
      • Consistent with the needs parameter definition in @evaluation.predict.
    • replicas: (int, optional)
      • Consistent with the replicas parameter definition in @evaluation.predict.
    • name: (str, optional)
      • The name displayed for the handler.
      • If not specified, use the decorated function's name.
    • expose: (int, optional)
      • The port exposed externally. When running a web handler, the exposed port needs to be declared.
      • The default is 0, meaning no port is exposed.
      • Currently only one port can be exposed.
    • require_dataset: (bool, optional)
      • Defines whether this handler requires a dataset when running.
      • If required_dataset=True, the user is required to input a dataset when creating an evaluation task on the Server/Cloud instance web page. If required_dataset=False, the user does not need to specify a dataset on the web page.
      • The default is False.

    Examples

    from starwhale import handler
    import gradio

    @handler(resources={"cpu": 1, "nvidia.com/gpu": 1}, replicas=3)
    def my_handler():
    ...

    @handler(needs=[my_handler])
    def my_another_handler():
    ...

    @handler(expose=7860)
    def chatbot():
    with gradio.Blocks() as server:
    ...
    server.launch(server_name="0.0.0.0", server_port=7860)

    @fine_tune

    fine_tune is a decorator that defines the fine-tuning process for model training.

    Some restrictions and usage suggestions:

    • fine_tune has only one replica.
    • fine_tune requires dataset input.
    • Generally, the dataset is obtained through Context.get_runtime_context() at the start of fine_tune.
    • Generally, at the end of fine_tune, the fine-tuned Starwhale model package is generated through starwhale.model.build, which will be automatically copied to the corresponding evaluation project.

    Parameters

    • resources: (dict, optional)
      • Consistent with the resources parameter definition in @evaluation.predict.
    • needs: (List[Callable], optional)
      • Consistent with the needs parameter definition in @evaluation.predict.

    Examples

    from starwhale import model as starwhale_model
    from starwhale import fine_tune, Context

    @fine_tune(resources={"nvidia.com/gpu": 1})
    def llama_fine_tuning():
    ctx = Context.get_runtime_context()

    if len(ctx.dataset_uris) == 2:
    # TODO: use more graceful way to get train and eval dataset
    train_dataset = dataset(ctx.dataset_uris[0], readonly=True, create="forbid")
    eval_dataset = dataset(ctx.dataset_uris[1], readonly=True, create="forbid")
    elif len(ctx.dataset_uris) == 1:
    train_dataset = dataset(ctx.dataset_uris[0], readonly=True, create="forbid")
    eval_dataset = None
    else:
    raise ValueError("Only support 1 or 2 datasets(train and eval dataset) for now")

    #user training code
    train_llama(
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    )

    model_name = get_model_name()
    starwhale_model.build(name=f"llama-{model_name}-qlora-ft")

    @multi_classification

    The @multi_classification decorator uses the sklearn lib to analyze results for multi-classification problems, outputting the confusion matrix, ROC, AUC etc., and writing them to related tables in the Starwhale Datastore.

    When using it, certain requirements are placed on the return value of the decorated function, which should be (label, result) or (label, result, probability_matrix).

    def multi_classification(
    confusion_matrix_normalize: str = "all",
    show_hamming_loss: bool = True,
    show_cohen_kappa_score: bool = True,
    show_roc_auc: bool = True,
    all_labels: t.Optional[t.List[t.Any]] = None,
    ) -> t.Any:

    Parameters

    • confusion_matrix_normalize: (str, optional)
      • Accepts three parameters:
        • true: rows
        • pred: columns
        • all: rows+columns
    • show_hamming_loss: (bool, optional)
      • Whether to calculate the Hamming loss.
      • The default is True.
    • show_cohen_kappa_score: (bool, optional)
      • Whether to calculate the Cohen kappa score.
      • The default is True.
    • show_roc_auc: (bool, optional)
      • Whether to calculate ROC/AUC. To calculate, the function needs to return a (label, result, probability_matrix) tuple, otherwise a (label, result) tuple is sufficient.
      • The default is True.
    • all_labels: (List, optional)
      • Defines all the labels.

    Examples


    @multi_classification(
    confusion_matrix_normalize="all",
    show_hamming_loss=True,
    show_cohen_kappa_score=True,
    show_roc_auc=True,
    all_labels=[i for i in range(0, 10)],
    )
    def evaluate(ppl_result) -> t.Tuple[t.List[int], t.List[int], t.List[t.List[float]]]:
    label, result, probability_matrix = [], [], []
    return label, result, probability_matrix

    @multi_classification(
    confusion_matrix_normalize="all",
    show_hamming_loss=True,
    show_cohen_kappa_score=True,
    show_roc_auc=False,
    all_labels=[i for i in range(0, 10)],
    )
    def evaluate(ppl_result) -> t.Tuple[t.List[int], t.List[int], t.List[t.List[float]]]:
    label, result = [], [], []
    return label, result

    PipelineHandler

    The PipelineHandler class provides a default model evaluation workflow definition that requires users to implement the predict and evaluate functions.

    The PipelineHandler is equivalent to using the @evaluation.predict and @evaluation.evaluate decorators together - the usage looks different but the underlying model evaluation process is the same.

    Note that PipelineHandler currently does not support defining resources parameters.

    Users need to implement the following functions:

    • predict: Defines the inference process, equivalent to a function decorated with @evaluation.predict.

    • evaluate: Defines the evaluation process, equivalent to a function decorated with @evaluation.evaluate.

    from typing import Any, Iterator
    from abc import ABCMeta, abstractmethod

    class PipelineHandler(metaclass=ABCMeta):
    def __init__(
    self,
    predict_batch_size: int = 1,
    ignore_error: bool = False,
    predict_auto_log: bool = True,
    predict_log_mode: str = PredictLogMode.PICKLE.value,
    predict_log_dataset_features: t.Optional[t.List[str]] = None,
    **kwargs: t.Any,
    ) -> None:
    self.context = Context.get_runtime_context()
    ...

    def predict(self, data: Any, **kw: Any) -> Any:
    raise NotImplementedError

    def evaluate(self, ppl_result: Iterator) -> Any
    raise NotImplementedError

    Parameters

    • predict_batch_size: (int, optional)
      • Equivalent to the batch_size parameter in @evaluation.predict.
      • Default is 1.
    • ignore_error: (bool, optional)
      • Equivalent to the fail_on_error parameter in @evaluation.predict.
      • Default is False.
    • predict_auto_log: (bool, optional)
      • Equivalent to the auto_log parameter in @evaluation.predict.
      • Default is True.
    • predict_log_mode: (str, optional)
      • Equivalent to the log_mode parameter in @evaluation.predict.
      • Default is pickle.
    • predict_log_dataset_features: (bool, optional)
      • Equivalent to the log_dataset_features parameter in @evaluation.predict.
      • Default is None, which records all features.

    PipelineHandler.run Decorator

    The PipelineHandler.run decorator can be used to describe resources for the predict and evaluate methods, supporting definitions of replicas and resources:

    • The PipelineHandler.run decorator can only decorate predict and evaluate methods in subclasses inheriting from PipelineHandler.
    • The predict method can set the replicas parameter. The replicas value for the evaluate method is always 1.
    • The resources parameter is defined and used in the same way as the resources parameter in @evaluation.predict or @evaluation.evaluate.
    • The PipelineHandler.run decorator is optional.
    • The PipelineHandler.run decorator only takes effect on Server and Cloud instances, not Standalone instances that don't support resource definition.
    @classmethod
    def run(
    cls, resources: t.Optional[t.Dict[str, t.Any]] = None, replicas: int = 1
    ) -> t.Callable:

    Examples

    import typing as t

    import torch
    from starwhale import PipelineHandler

    class Example(PipelineHandler):
    def __init__(self) -> None:
    super().__init__()
    self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    self.model = self._load_model(self.device)

    @PipelineHandler.run(replicas=4, resources={"memory": 1 * 1024 * 1024 *1024, "nvidia.com/gpu": 1}) # 1G Memory, 1 GPU
    def predict(self, data: t.Dict):
    data_tensor = self._pre(data.img)
    output = self.model(data_tensor)
    return self._post(output)

    @PipelineHandler.run(resources={"memory": 1 * 1024 * 1024 *1024}) # 1G Memory
    def evaluate(self, ppl_result):
    result, label, pr = [], [], []
    for _data in ppl_result:
    label.append(_data["input"]["label"])
    result.extend(_data["output"][0])
    pr.extend(_data["output"][1])
    return label, result, pr

    def _pre(self, input: Image) -> torch.Tensor:
    ...

    def _post(self, input):
    ...

    def _load_model(self, device):
    ...

    Context

    The context information passed during model evaluation, including Project, Task ID, etc. The Context content is automatically injected and can be used in the following ways:

    • Inherit the PipelineHandler class and use the self.context object.
    • Get it through Context.get_runtime_context().

    Note that Context can only be used during model evaluation, otherwise the program will throw an exception.

    Currently Context can get the following values:

    • project: str
      • Project name.
    • version: str
      • Unique ID of model evaluation.
    • step: str
      • Step name.
    • total: int
      • Total number of Tasks under the Step.
    • index: int
      • Task index number, starting from 0.
    • dataset_uris: List[str]
      • List of Starwhale dataset URIs.

    Examples


    from starwhale import Context, PipelineHandler

    def func():
    ctx = Context.get_runtime_context()
    print(ctx.project)
    print(ctx.version)
    print(ctx.step)
    ...

    class Example(PipelineHandler):

    def predict(self, data: t.Dict):
    print(self.context.project)
    print(self.context.version)
    print(self.context.step)

    @starwhale.api.service.api

    @starwhale.api.service.api is a decorator that provides a simple Web Handler input definition based on Gradio for accepting external requests and returning inference results to the user when launching a Web Service with the swcli model serve command, enabling online evaluation.

    Examples

    import gradio
    from starwhale.api.service import api

    def predict_image(img):
    ...

    @api(gradio.File(), gradio.Label())
    def predict_view(file: t.Any) -> t.Any:
    with open(file.name, "rb") as f:
    data = Image(f.read(), shape=(28, 28, 1))
    _, prob = predict_image({"img": data})
    return {i: p for i, p in enumerate(prob)}

    starwhale.api.service.Service

    If you want to customize the web service implementation, you can subclass Service and override the serve method.

    class CustomService(Service):
    def serve(self, addr: str, port: int, handler_list: t.List[str] = None) -> None:
    ...

    svc = CustomService()

    @svc.api(...)
    def handler(data):
    ...

    Notes:

    • Handlers added with PipelineHandler.add_api and the api decorator or Service.api can work together
    • If using a custom Service, you need to instantiate the custom Service class in the model

    Custom Request and Response

    Request and Response are handler preprocessing and postprocessing classes for receiving user requests and returning results. They can be simply understood as pre and post logic for the handler.

    Starwhale provides built-in Request implementations for Dataset types and Json Response. Users can also customize the logic as follows:

    import typing as t

    from starwhale.api.service import (
    Request,
    Service,
    Response,
    )

    class CustomInput(Request):
    def load(self, req: t.Any) -> t.Any:
    return req

    class CustomOutput(Response):
    def __init__(self, prefix: str) -> None:
    self.prefix = prefix

    def dump(self, req: str) -> bytes:
    return f"{self.prefix} {req}".encode("utf-8")

    svc = Service()

    @svc.api(request=CustomInput(), response=CustomOutput("hello"))
    def foo(data: t.Any) -> t.Any:
    ...
    - + \ No newline at end of file diff --git a/0.5.12/reference/sdk/job/index.html b/0.5.12/reference/sdk/job/index.html index 65294c769..ec9e545dd 100644 --- a/0.5.12/reference/sdk/job/index.html +++ b/0.5.12/reference/sdk/job/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    Starwhale Task SDK

    job

    Get a starwhale.Job object through the Job URI parameter, which represents a Job on Standalone/Server/Cloud instances.

    @classmethod
    def job(
    cls,
    uri: str,
    ) -> Job:

    Parameters

    • uri: (str, required)
      • Job URI format.

    Usage Example

    from starwhale import job

    # get job object of uri=https://server/job/1
    j1 = job("https://server/job/1")

    # get job from standalone instance
    j2 = job("local/project/self/job/xm5wnup")
    j3 = job("xm5wnup")

    class starwhale.Job

    starwhale.Job abstracts Starwhale Job and enables some information retrieval operations on the job.

    list

    list is a classmethod that can list the jobs under a project.

    @classmethod
    def list(
    cls,
    project: str = "",
    page_index: int = DEFAULT_PAGE_IDX,
    page_size: int = DEFAULT_PAGE_SIZE,
    ) -> Tuple[List[Job], Dict]:

    Parameters

    • project: (str, optional)
      • Project URI, can be projects on Standalone/Server/Cloud instances.
      • If project is not specified, the project selected by swcli project selected will be used.
    • page_index: (int, optional)
      • When getting the jobs list from Server/Cloud instances, paging is supported. This parameter specifies the page number.
        • Default is 1.
        • Page numbers start from 1.
      • Standalone instances do not support paging. This parameter has no effect.
    • page_size: (int, optional)
      • When getting the jobs list from Server/Cloud instances, paging is supported. This parameter specifies the number of jobs returned per page.
        • Default is 1.
        • Page numbers start from 1.
      • Standalone instances do not support paging. This parameter has no effect.

    Usage Example

    from starwhale import Job

    # list jobs of current selected project
    jobs, pagination_info = Job.list()

    # list jobs of starwhale/public project in the cloud.starwhale.cn instance
    jobs, pagination_info = Job.list("https://cloud.starwhale.cn/project/starwhale:public")

    # list jobs of id=1 project in the server instance, page index is 2, page size is 10
    jobs, pagination_info = Job.list("https://server/project/1", page_index=2, page_size=10)

    get

    get is a classmethod that gets information about a specific job and returns a Starwhale.Job object. It has the same functionality and parameter definitions as the starwhale.job function.

    Usage Example

    from starwhale import Job

    # get job object of uri=https://server/job/1
    j1 = Job.get("https://server/job/1")

    # get job from standalone instance
    j2 = Job.get("local/project/self/job/xm5wnup")
    j3 = Job.get("xm5wnup")

    summary

    summary is a property that returns the data written to the summary table during the job execution, in dict type.

    @property
    def summary(self) -> Dict[str, Any]:

    Usage Example

    from starwhale import jobs

    j1 = job("https://server/job/1")

    print(j1.summary)

    tables

    tables is a property that returns the names of tables created during the job execution (not including the summary table, which is created automatically at the project level), in list type.

    @property
    def tables(self) -> List[str]:

    Usage Example

    from starwhale import jobs

    j1 = job("https://server/job/1")

    print(j1.tables)

    get_table_rows

    get_table_rows is a method that returns records from a data table according to the table name and other parameters, in iterator type.

    def get_table_rows(
    self,
    name: str,
    start: Any = None,
    end: Any = None,
    keep_none: bool = False,
    end_inclusive: bool = False,
    ) -> Iterator[Dict[str, Any]]:

    Parameters

    • name: (str, required)
      • Datastore table name. The one of table names obtained through the tables property is ok.
    • start: (Any, optional)
      • The starting ID value of the returned records.
      • Default is None, meaning start from the beginning of the table.
    • end: (Any, optional)
      • The ending ID value of the returned records.
      • Default is None, meaning until the end of the table.
      • If both start and end are None, all records in the table will be returned as an iterator.
    • keep_none: (bool, optional)
      • Whether to return records with None values.
      • Default is False.
    • end_inclusive: (bool, optional)
      • When end is set, whether the iteration includes the end record.
      • Default is False.

    Usage Example

    from starwhale import job

    j = job("local/project/self/job/xm5wnup")

    table_name = j.tables[0]

    for row in j.get_table_rows(table_name):
    print(row)

    rows = list(j.get_table_rows(table_name, start=0, end=100))

    # return the first record from the results table
    result = list(j.get_table_rows('results', start=0, end=1))[0]
    - + \ No newline at end of file diff --git a/0.5.12/reference/sdk/model/index.html b/0.5.12/reference/sdk/model/index.html index 048b095ce..3709026b2 100644 --- a/0.5.12/reference/sdk/model/index.html +++ b/0.5.12/reference/sdk/model/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    Starwhale Model SDK

    model.build

    model.build is a function that can build the Starwhale model, equivalent to the swcli model build command.

    def build(
    modules: t.Optional[t.List[t.Any]] = None,
    workdir: t.Optional[_path_T] = None,
    name: t.Optional[str] = None,
    project_uri: str = "",
    desc: str = "",
    remote_project_uri: t.Optional[str] = None,
    add_all: bool = False,
    tags: t.List[str] | None = None,
    ) -> None:

    Parameters

    • modules: (List[str|object], optional)
      • The search modules supports object(function, class or module) or str(example: "to.path.module", "to.path.module:object").
      • If the argument is not specified, the search modules are the imported modules.
    • name: (str, optional)
      • Starwhale Model name.
      • The default is the current work dir (cwd) name.
    • workdir: (str, Pathlib.Path, optional)
      • The path of the rootdir. The default workdir is the current working dir.
      • All files in the workdir will be packaged. If you want to ignore some files, you can add .swignore file in the workdir.
    • project_uri: (str, optional)
      • The project uri of the Starwhale Model.
      • If the argument is not specified, the project_uri is the config value of swcli project select command.
    • desc: (str, optional)
      • The description of the Starwhale Model.
    • remote_project_uri: (str, optional)
      • Project URI of another example instance. After the Starwhale model is built, it will be automatically copied to the remote instance.
    • add_all: (bool, optional)
      • Add all files in the working directory to the model package(excludes python cache files and virtual environment files when disabled).The .swignore file still takes effect.
      • The default value is False.
    • tags: (List[str], optional)
      • The tags for the model version.
      • latest and ^v\d+$ tags are reserved tags.

    Examples

    from starwhale import model

    # class search handlers
    from .user.code.evaluator import ExamplePipelineHandler
    model.build([ExamplePipelineHandler])

    # function search handlers
    from .user.code.evaluator import predict_image
    model.build([predict_image])

    # module handlers, @handler decorates function in this module
    from .user.code import evaluator
    model.build([evaluator])

    # str search handlers
    model.build(["user.code.evaluator:ExamplePipelineHandler"])
    model.build(["user.code1", "user.code2"])

    # no search handlers, use imported modules
    model.build()

    # add user custom tags
    model.build(tags=["t1", "t2"])
    - + \ No newline at end of file diff --git a/0.5.12/reference/sdk/other/index.html b/0.5.12/reference/sdk/other/index.html index 7a7d83407..0bef1d5df 100644 --- a/0.5.12/reference/sdk/other/index.html +++ b/0.5.12/reference/sdk/other/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    Other SDK

    __version__

    Version of Starwhale Python SDK and swcli, string constant.

    >>> from starwhale import __version__
    >>> print(__version__)
    0.5.7

    init_logger

    Initialize Starwhale logger and traceback. The default value is 0.

    • 0: show only errors, traceback only shows 1 frame.
    • 1: show errors + warnings, traceback shows 5 frames.
    • 2: show errors + warnings + info, traceback shows 10 frames.
    • 3: show errors + warnings + info + debug, traceback shows 100 frames.
    • >=4: show errors + warnings + info + debug + trace, traceback shows 1000 frames.
    def init_logger(verbose: int = 0) -> None:

    login

    Log in to a server/cloud instance. It is equivalent to running the swcli instance login command. Log in to the Standalone instance is meaningless.

    def login(
    instance: str,
    alias: str = "",
    username: str = "",
    password: str = "",
    token: str = "",
    ) -> None:

    Parameters

    • instance: (str, required)
      • The http url of the server/cloud instance.
    • alias: (str, optional)
      • An alias for the instance to simplify the instance part of the Starwhale URI.
      • If not specified, the hostname part of the instance http url will be used.
    • username: (str, optional)
    • password: (str, optional)
    • token: (str, optional)
      • You can only choose one of username + password or token to login to the instance.

    Examples

    from starwhale import login

    # login to Starwhale Cloud instance by token
    login(instance="https://cloud.starwhale.cn", alias="cloud-cn", token="xxx")

    # login to Starwhale Server instance by username and password
    login(instance="http://controller.starwhale.svc", alias="dev", username="starwhale", password="abcd1234")

    logout

    Log out of a server/cloud instance. It is equivalent to running the swcli instance logout command. Log out of the Standalone instance is meaningless.

    def logout(instance: str) -> None:

    Examples

    from starwhale import login, logout

    login(instance="https://cloud.starwhale.cn", alias="cloud-cn", token="xxx")
    # logout by the alias
    logout("cloud-cn")

    login(instance="http://controller.starwhale.svc", alias="dev", username="starwhale", password="abcd1234")
    # logout by the instance http url
    logout("http://controller.starwhale.svc")
    - + \ No newline at end of file diff --git a/0.5.12/reference/sdk/overview/index.html b/0.5.12/reference/sdk/overview/index.html index c468e1cec..dc051d31e 100644 --- a/0.5.12/reference/sdk/overview/index.html +++ b/0.5.12/reference/sdk/overview/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    Python SDK Overview

    Starwhale provides a series of Python SDKs to help manage datasets, models, evaluations etc. Using the Starwhale Python SDK can make it easier to complete your ML/DL development tasks.

    Classes

    • PipelineHandler: Provides default model evaluation process definition, requires implementation of predict and evaluate methods.
    • Context: Passes context information during model evaluation, including Project, Task ID etc.
    • class Dataset: Starwhale Dataset class.
    • class starwhale.api.service.Service: The base class of online evaluation.
    • class Job: Provides operations for Job.

    Functions

    • @multi_classification: Decorator for multi-class problems to simplify evaluate result calculation and storage for better evaluation presentation.
    • @handler: Decorator to define a running entity with resource attributes (mem/cpu/gpu). You can control replica count. Handlers can form DAGs through dependencies to control execution flow.
    • @evaluation.predict: Decorator to define inference process in model evaluation, similar to map phase in MapReduce.
    • @evaluation.evaluate: Decorator to define evaluation process in model evaluation, similar to reduce phase in MapReduce.
    • evaluation.log: Log evaluation metrics to the specific tables.
    • evaluation.log_summary: Log certain metrics to the summary table.
    • evaluation.iter: Iterate and read data from the certain tables.
    • model.build: Build Starwhale model.
    • @fine_tune: Decorator to define model fine-tuning process.
    • init_logger: Set log level, implement 5-level logging.
    • dataset: Get starwhale.Dataset object, by creating new datasets or loading existing datasets.
    • @starwhale.api.service.api: Decorator to provide a simple Web Handler input definition based on Gradio.
    • login: Log in to the server/cloud instance.
    • logout: Log out of the server/cloud instance.
    • job: Get starwhale.Job object by the Job URI.
    • @PipelineHandler.run: Decorator to define the resources for the predict and evaluate methods in PipelineHandler subclasses.

    Data Types

    • COCOObjectAnnotation: Provides COCO format definitions.
    • BoundingBox: Bounding box type, currently in LTWH format - left_x, top_y, width and height.
    • ClassLabel: Describes the number and types of labels.
    • Image: Image type.
    • GrayscaleImage: Grayscale image type, e.g. MNIST digit images, a special case of Image type.
    • Audio: Audio type.
    • Video: Video type.
    • Text: Text type, default utf-8 encoding, for storing large texts.
    • Binary: Binary type, stored in bytes, for storing large binary content.
    • Line: Line type.
    • Point: Point type.
    • Polygon: Polygon type.
    • Link: Link type, for creating remote-link data.
    • S3LinkAuth: When data is stored in S3-based object storage, this type describes auth and key info.
    • MIMEType: Describes multimedia types supported by Starwhale, used in mime_type attribute of Image, Video etc for better Dataset Viewer.
    • LinkType: Describes remote link types supported by Starwhale, currently LocalFS and S3.

    Other

    • __version__: Version of Starwhale Python SDK and swcli, string constant.

    Further reading

    - + \ No newline at end of file diff --git a/0.5.12/reference/sdk/type/index.html b/0.5.12/reference/sdk/type/index.html index cf8f1a3d4..20102b9be 100644 --- a/0.5.12/reference/sdk/type/index.html +++ b/0.5.12/reference/sdk/type/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    Starwhale Data Types

    COCOObjectAnnotation

    It provides definitions following the COCO format.

    COCOObjectAnnotation(
    id: int,
    image_id: int,
    category_id: int,
    segmentation: Union[t.List, t.Dict],
    area: Union[float, int],
    bbox: Union[BoundingBox, t.List[float]],
    iscrowd: int,
    )
    ParameterDescription
    idObject id, usually a globally incrementing id
    image_idImage id, usually id of the image
    category_idCategory id, usually id of the class in object detection
    segmentationObject contour representation, Polygon (polygon vertices) or RLE format
    areaObject area
    bboxRepresents bounding box, can be BoundingBox type or list of floats
    iscrowd0 indicates a single object, 1 indicates two unseparated objects

    Examples

    def _make_coco_annotations(
    self, mask_fpath: Path, image_id: int
    ) -> t.List[COCOObjectAnnotation]:
    mask_img = PILImage.open(str(mask_fpath))

    mask = np.array(mask_img)
    object_ids = np.unique(mask)[1:]
    binary_mask = mask == object_ids[:, None, None]
    # TODO: tune permute without pytorch
    binary_mask_tensor = torch.as_tensor(binary_mask, dtype=torch.uint8)
    binary_mask_tensor = (
    binary_mask_tensor.permute(0, 2, 1).contiguous().permute(0, 2, 1)
    )

    coco_annotations = []
    for i in range(0, len(object_ids)):
    _pos = np.where(binary_mask[i])
    _xmin, _ymin = float(np.min(_pos[1])), float(np.min(_pos[0]))
    _xmax, _ymax = float(np.max(_pos[1])), float(np.max(_pos[0]))
    _bbox = BoundingBox(
    x=_xmin, y=_ymin, width=_xmax - _xmin, height=_ymax - _ymin
    )

    rle: t.Dict = coco_mask.encode(binary_mask_tensor[i].numpy()) # type: ignore
    rle["counts"] = rle["counts"].decode("utf-8")

    coco_annotations.append(
    COCOObjectAnnotation(
    id=self.object_id,
    image_id=image_id,
    category_id=1, # PennFudan Dataset only has one class-PASPersonStanding
    segmentation=rle,
    area=_bbox.width * _bbox.height,
    bbox=_bbox,
    iscrowd=0, # suppose all instances are not crowd
    )
    )
    self.object_id += 1

    return coco_annotations

    GrayscaleImage

    GrayscaleImage provides a grayscale image type. It is a special case of the Image type, for example the digit images in MNIST.

    GrayscaleImage(
    fp: _TArtifactFP = "",
    display_name: str = "",
    shape: Optional[_TShape] = None,
    as_mask: bool = False,
    mask_uri: str = "",
    )
    ParameterDescription
    fpImage path, IO object, or file content bytes
    display_nameDisplay name shown in Dataset Viewer
    shapeImage width and height, default channel is 1
    as_maskWhether used as a mask image
    mask_uriURI of the original image for the mask

    Examples

    for i in range(0, min(data_number, label_number)):
    _data = data_file.read(image_size)
    _label = struct.unpack(">B", label_file.read(1))[0]
    yield GrayscaleImage(
    _data,
    display_name=f"{i}",
    shape=(height, width, 1),
    ), {"label": _label}

    GrayscaleImage Functions

    GrayscaleImage.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    GrayscaleImage.carry_raw_data

    carry_raw_data() -> GrayscaleImage

    GrayscaleImage.astype

    astype() -> Dict[str, t.Any]

    BoundingBox

    BoundingBox provides a bounding box type, currently in LTWH format:

    • left_x: x-coordinate of left edge
    • top_y: y-coordinate of top edge
    • width: width of bounding box
    • height: height of bounding box

    So it represents the bounding box using the coordinates of its left, top, width and height. This is a common format for specifying bounding boxes in computer vision tasks.

    BoundingBox(
    x: float,
    y: float,
    width: float,
    height: float
    )
    ParameterDescription
    xx-coordinate of left edge (left_x)
    yy-coordinate of top edge (top_y)
    widthWidth of bounding box
    heightHeight of bounding box

    ClassLabel

    Describe labels.

    ClassLabel(
    names: List[Union[int, float, str]]
    )

    Image

    Image Type.

    Image(
    fp: _TArtifactFP = "",
    display_name: str = "",
    shape: Optional[_TShape] = None,
    mime_type: Optional[MIMEType] = None,
    as_mask: bool = False,
    mask_uri: str = "",
    )
    ParameterDescription
    fpImage path, IO object, or file content bytes
    display_nameDisplay name shown in Dataset Viewer
    shapeImage width, height and channels
    mime_typeMIMEType supported types
    as_maskWhether used as a mask image
    mask_uriURI of the original image for the mask

    The main difference from GrayscaleImage is that Image supports multi-channel RGB images by specifying shape as (W, H, C).

    Examples

    import io
    import typing as t
    import pickle
    from PIL import Image as PILImage
    from starwhale import Image, MIMEType

    def _iter_item(paths: t.List[Path]) -> t.Generator[t.Tuple[t.Any, t.Dict], None, None]:
    for path in paths:
    with path.open("rb") as f:
    content = pickle.load(f, encoding="bytes")
    for data, label, filename in zip(
    content[b"data"], content[b"labels"], content[b"filenames"]
    ):
    annotations = {
    "label": label,
    "label_display_name": dataset_meta["label_names"][label],
    }

    image_array = data.reshape(3, 32, 32).transpose(1, 2, 0)
    image_bytes = io.BytesIO()
    PILImage.fromarray(image_array).save(image_bytes, format="PNG")

    yield Image(
    fp=image_bytes.getvalue(),
    display_name=filename.decode(),
    shape=image_array.shape,
    mime_type=MIMEType.PNG,
    ), annotations

    Image Functions

    Image.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Image.carry_raw_data

    carry_raw_data() -> GrayscaleImage

    Image.astype

    astype() -> Dict[str, t.Any]

    Video

    Video type.

    Video(
    fp: _TArtifactFP = "",
    display_name: str = "",
    mime_type: Optional[MIMEType] = None,
    )
    ParameterDescription
    fpVideo path, IO object, or file content bytes
    display_nameDisplay name shown in Dataset Viewer
    mime_typeMIMEType supported types

    Examples

    import typing as t
    from pathlib import Path

    from starwhale import Video, MIMEType

    root_dir = Path(__file__).parent.parent
    dataset_dir = root_dir / "data" / "UCF-101"
    test_ds_path = [root_dir / "data" / "test_list.txt"]

    def iter_ucf_item() -> t.Generator:
    for path in test_ds_path:
    with path.open() as f:
    for line in f.readlines():
    _, label, video_sub_path = line.split()

    data_path = dataset_dir / video_sub_path
    data = Video(
    data_path,
    display_name=video_sub_path,
    shape=(1,),
    mime_type=MIMEType.WEBM,
    )

    yield f"{label}_{video_sub_path}", {
    "video": data,
    "label": label,
    }

    Audio

    Audio type.

    Audio(
    fp: _TArtifactFP = "",
    display_name: str = "",
    mime_type: Optional[MIMEType] = None,
    )
    ParameterDescription
    fpAudio path, IO object, or file content bytes
    display_nameDisplay name shown in Dataset Viewer
    mime_typeMIMEType supported types

    Examples

    import typing as t
    from starwhale import Audio

    def iter_item() -> t.Generator[t.Tuple[t.Any, t.Any], None, None]:
    for path in validation_ds_paths:
    with path.open() as f:
    for item in f.readlines():
    item = item.strip()
    if not item:
    continue

    data_path = dataset_dir / item
    data = Audio(
    data_path, display_name=item, shape=(1,), mime_type=MIMEType.WAV
    )

    speaker_id, utterance_num = data_path.stem.split("_nohash_")
    annotations = {
    "label": data_path.parent.name,
    "speaker_id": speaker_id,
    "utterance_num": int(utterance_num),
    }
    yield data, annotations

    Audio Functions

    Audio.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Audio.carry_raw_data

    carry_raw_data() -> Audio

    Audio.astype

    astype() -> Dict[str, t.Any]

    Text

    Text type, the default encode type is utf-8.

    Text(
    content: str,
    encoding: str = "utf-8",
    )
    ParameterDescription
    contentThe text content
    encodingEncoding format of the text

    Examples

    import typing as t
    from pathlib import Path
    from starwhale import Text

    def iter_item(self) -> t.Generator[t.Tuple[t.Any, t.Any], None, None]:
    root_dir = Path(__file__).parent.parent / "data"

    with (root_dir / "fra-test.txt").open("r") as f:
    for line in f.readlines():
    line = line.strip()
    if not line or line.startswith("CC-BY"):
    continue

    _data, _label, *_ = line.split("\t")
    data = Text(_data, encoding="utf-8")
    annotations = {"label": _label}
    yield data, annotations

    Text Functions

    to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Text.carry_raw_data

    carry_raw_data() -> Text

    Text.astype

    astype() -> Dict[str, t.Any]

    Text.to_str

    to_str() -> str

    Binary

    Binary provides a binary data type, stored as bytes.

    Binary(
    fp: _TArtifactFP = "",
    mime_type: MIMEType = MIMEType.UNDEFINED,
    )
    ParameterDescription
    fpPath, IO object, or file content bytes
    mime_typeMIMEType supported types

    Binary Functions

    Binary.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Binary.carry_raw_data

    carry_raw_data() -> Binary

    Binary.astype

    astype() -> Dict[str, t.Any]

    Link provides a link type to create remote-link datasets in Starwhale.

    Link(
    uri: str,
    auth: Optional[LinkAuth] = DefaultS3LinkAuth,
    offset: int = 0,
    size: int = -1,
    data_type: Optional[BaseArtifact] = None,
    )
    ParameterDescription
    uriURI of the original data, currently supports localFS and S3 protocols
    authLink auth information
    offsetData offset relative to file pointed by uri
    sizeData size
    data_typeActual data type pointed by the link, currently supports Binary, Image, Text, Audio and Video

    Link.astype

    astype() -> Dict[str, t.Any]

    S3LinkAuth

    S3LinkAuth provides authentication and key information when data is stored on S3 protocol based object storage.

    S3LinkAuth(
    name: str = "",
    access_key: str = "",
    secret: str = "",
    endpoint: str = "",
    region: str = "local",
    )
    ParameterDescription
    nameName of the auth
    access_keyAccess key for S3 connection
    secretSecret for S3 connection
    endpointEndpoint URL for S3 connection
    regionS3 region where bucket is located, default is local.

    Examples

    import struct
    import typing as t
    from pathlib import Path

    from starwhale import (
    Link,
    S3LinkAuth,
    GrayscaleImage,
    UserRawBuildExecutor,
    )
    class LinkRawDatasetProcessExecutor(UserRawBuildExecutor):
    _auth = S3LinkAuth(name="mnist", access_key="minioadmin", secret="minioadmin")
    _endpoint = "10.131.0.1:9000"
    _bucket = "users"

    def iter_item(self) -> t.Generator[t.Tuple[t.Any, t.Any], None, None]:
    root_dir = Path(__file__).parent.parent / "data"

    with (root_dir / "t10k-labels-idx1-ubyte").open("rb") as label_file:
    _, label_number = struct.unpack(">II", label_file.read(8))

    offset = 16
    image_size = 28 * 28

    uri = f"s3://{self._endpoint}/{self._bucket}/dataset/mnist/t10k-images-idx3-ubyte"
    for i in range(label_number):
    _data = Link(
    f"{uri}",
    self._auth,
    offset=offset,
    size=image_size,
    data_type=GrayscaleImage(display_name=f"{i}", shape=(28, 28, 1)),
    )
    _label = struct.unpack(">B", label_file.read(1))[0]
    yield _data, {"label": _label}
    offset += image_size

    MIMEType

    MIMEType describes the multimedia types supported by Starwhale, implemented using Python Enum. It is used in the mime_type attribute of Image, Video etc to enable better Dataset Viewer support.

    class MIMEType(Enum):
    PNG = "image/png"
    JPEG = "image/jpeg"
    WEBP = "image/webp"
    SVG = "image/svg+xml"
    GIF = "image/gif"
    APNG = "image/apng"
    AVIF = "image/avif"
    PPM = "image/x-portable-pixmap"
    MP4 = "video/mp4"
    AVI = "video/avi"
    WEBM = "video/webm"
    WAV = "audio/wav"
    MP3 = "audio/mp3"
    PLAIN = "text/plain"
    CSV = "text/csv"
    HTML = "text/html"
    GRAYSCALE = "x/grayscale"
    UNDEFINED = "x/undefined"

    LinkType

    LinkType describes the remote link types supported by Starwhale, also implemented using Python Enum. Currently supports LocalFS and S3 types.

    class LinkType(Enum):
    LocalFS = "local_fs"
    S3 = "s3"
    UNDEFINED = "undefined"

    Line

    from starwhale import ds, Point, Line

    with dataset("collections") as ds:
    line_points = [
    Point(x=0.0, y=1.0),
    Point(x=0.0, y=100.0)
    ]
    ds.append({"line": line_points})
    ds.commit()

    Point

    from starwhale import ds, Point

    with dataset("collections") as ds:
    ds.append(Point(x=0.0, y=100.0))
    ds.commit()

    Polygon

    from starwhale import ds, Point, Polygon

    with dataset("collections") as ds:
    polygon_points = [
    Point(x=0.0, y=1.0),
    Point(x=0.0, y=100.0),
    Point(x=2.0, y=1.0),
    Point(x=2.0, y=100.0),
    ]
    ds.append({"polygon": polygon_points})
    ds.commit()
    - + \ No newline at end of file diff --git a/0.5.12/reference/swcli/dataset/index.html b/0.5.12/reference/swcli/dataset/index.html index 60c37a0f8..a443ae978 100644 --- a/0.5.12/reference/swcli/dataset/index.html +++ b/0.5.12/reference/swcli/dataset/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    swcli dataset

    Overview

    swcli [GLOBAL OPTIONS] dataset [OPTIONS] <SUBCOMMAND> [ARGS]...

    The dataset command includes the following subcommands:

    • build
    • copy(cp)
    • diff
    • head
    • history
    • info
    • list(ls)
    • recover
    • remove(rm)
    • summary
    • tag

    swcli dataset build

    swcli [GLOBAL OPTIONS] dataset build [OPTIONS]

    Build Starwhale Dataset. This command only supports to build standalone dataset.

    Options

    • Data sources options:
    OptionRequiredTypeDefaultsDescription
    -if or --image or --image-folderNStringBuild dataset from image folder, the folder should contain the image files.
    -af or --audio or --audio-folderNStringBuild dataset from audio folder, the folder should contain the audio files.
    -vf or --video or --video-folderNStringBuild dataset from video folder, the folder should contain the video files.
    -h or --handler or --python-handlerNStringBuild dataset from python executor handler, the handler format is [module path]:[class or func name].
    -f or --yaml or --dataset-yamlNdataset.yaml in cwdBuild dataset from dataset.yaml file. Default uses dataset.yaml in the work directory(cwd).
    -jf or --jsonNStringBuild dataset from json or jsonl file, the json or jsonl file option is a json file path or a http downloaded url.The json content structure should be a list[dict] or tuple[dict].
    -hf or --huggingfaceNStringBuild dataset from huggingface dataset, the huggingface option is a huggingface repo name.
    -c or --csvNStringBuild dataset from csv files. The option is a csv file path, dir path or a http downloaded url.The option can be used multiple times.

    Data source options are mutually exclusive, only one option is accepted. If no set, swcli dataset build command will use dataset yaml mode to build dataset with the dataset.yaml in the cwd.

    • Other options:
    OptionRequiredScopeTypeDefaultsDescription
    -pt or --patchone of --patch and --overwriteGlobalBooleanTruePatch mode, only update the changed rows and columns for the existed dataset.
    -ow or --overwriteone of --patch and --overwriteGlobalBooleanFalseOverwrite mode, update records and delete extraneous rows from the existed dataset.
    -n or --nameNGlobalStringDataset name
    -p or --projectNGlobalStringDefault projectProject URI, the default is the current selected project. The dataset will store in the specified project.
    -d or --descNGlobalStringDataset description
    -as or --alignment-sizeNGlobalString128Bswds-bin format dataset: alignment size
    -vs or --volume-sizeNGlobalString64MBswds-bin format dataset: volume size
    -r or --runtimeNGlobalStringRuntime URI
    -w or --workdirNPython Handler ModeStringcwdwork dir to search handler.
    --auto-label/--no-auto-labelNImage/Video/Audio Folder ModeBooleanTrueWhether to auto label by the sub-folder name.
    --field-selectorNJSON File ModeStringThe filed from which you would like to extract dataset array items. The filed is split by the dot(.) symbol.
    --subsetNHuggingface ModeStringHuggingface dataset subset name. If the subset name is not specified, the all subsets will be built.
    --splitNHuggingface ModeStringHuggingface dataset split name. If the split name is not specified, the all splits will be built.
    --revisionNHuggingface ModeStringmainVersion of the dataset script to load. Defaults to 'main'. The option value accepts tag name, or branch name, or commit hash.
    --add-hf-info/--no-add-hf-infoNHuggingface ModeBooleanTrueWhether to add huggingface dataset info to the dataset rows, currently support to add subset and split into the dataset rows. Subset uses _hf_subset field name, split uses _hf_split field name.
    --cache/--no-cacheNHuggingface ModeBooleanTrueWhether to use huggingface dataset cache(download + local hf dataset).
    -t or --tagNGlobalStringDataset tags, the option can be used multiple times.
    --encodingNCSV/JSON/JSONL ModeStringfile encoding.
    --dialectNCSV ModeStringexcelThe csv file dialect, the default is excel. Current supports excel, excel-tab and unix formats.
    --delimiterNCSV ModeString,A one-character string used to separate fields for the csv file.
    --quotecharNCSV ModeString"A one-character string used to quote fields containing special characters, such as the delimiter or quotechar, or which contain new-line characters.
    --skipinitialspace/--no-skipinitialspaceNCSV ModeBoolFalseWhether to skip spaces after delimiter for the csv file.
    --strict/--no-strictNCSV ModeBoolFalseWhen True, raise exception Error if the csv is not well formed.

    Examples for dataset building

    #- from dataset.yaml
    swcli dataset build # build dataset from dataset.yaml in the current work directory(pwd)
    swcli dataset build --yaml /path/to/dataset.yaml # build dataset from /path/to/dataset.yaml, all the involved files are related to the dataset.yaml file.
    swcli dataset build --overwrite --yaml /path/to/dataset.yaml # build dataset from /path/to/dataset.yaml, and overwrite the existed dataset.
    swcli dataset build --tag tag1 --tag tag2

    #- from handler
    swcli dataset build --handler mnist.dataset:iter_mnist_item # build dataset from mnist.dataset:iter_mnist_item handler, the workdir is the current work directory(pwd).
    # build dataset from mnist.dataset:LinkRawDatasetProcessExecutor handler, the workdir is example/mnist
    swcli dataset build --handler mnist.dataset:LinkRawDatasetProcessExecutor --workdir example/mnist

    #- from image folder
    swcli dataset build --image-folder /path/to/image/folder # build dataset from /path/to/image/folder, search all image type files.

    #- from audio folder
    swcli dataset build --audio-folder /path/to/audio/folder # build dataset from /path/to/audio/folder, search all audio type files.

    #- from video folder
    swcli dataset build --video-folder /path/to/video/folder # build dataset from /path/to/video/folder, search all video type files.

    #- from json/jsonl file
    swcli dataset build --json /path/to/example.json
    swcli dataset build --json http://example.com/example.json
    swcli dataset build --json /path/to/example.json --field-selector a.b.c # extract the json_content["a"]["b"]["c"] field from the json file.
    swcli dataset build --name qald9 --json https://raw.githubusercontent.com/ag-sc/QALD/master/9/data/qald-9-test-multilingual.json --field-selector questions
    swcli dataset build --json /path/to/test01.jsonl --json /path/to/test02.jsonl
    swcli dataset build --json https://modelscope.cn/api/v1/datasets/damo/100PoisonMpts/repo\?Revision\=master\&FilePath\=train.jsonl

    #- from huggingface dataset
    swcli dataset build --huggingface mnist
    swcli dataset build -hf mnist --no-cache
    swcli dataset build -hf cais/mmlu --subset anatomy --split auxiliary_train --revision 7456cfb

    #- from csv files
    swcli dataset build --csv /path/to/example.csv
    swcli dataset build --csv /path/to/example.csv --csv-file /path/to/example2.csv
    swcli dataset build --csv /path/to/csv-dir
    swcli dataset build --csv http://example.com/example.csv
    swcli dataset build --name product-desc-modelscope --csv https://modelscope.cn/api/v1/datasets/lcl193798/product_description_generation/repo\?Revision\=master\&FilePath\=test.csv --encoding=utf-8-sig

    swcli dataset copy

    swcli [GLOBAL OPTIONS] dataset copy [OPTIONS] <SRC> <DEST>

    dataset copy copies from SRC to DEST.

    SRC and DEST are both dataset URIs.

    When copying Starwhale Dataset, all custom user-defined labels will be copied by default. You can use the --ignore-tag parameter to ignore certain labels. In addition, the latest and ^v\d+$ labels are Starwhale built-in labels that are only used within the instance itself and will not be copied to other instances.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, DEST will be overwritten if it exists. In addition, if the labels carried during duplication have already been used by other versions, this parameter can be used to forcibly update the labels to this version.
    -p or --patchone of --patch and --overwriteBooleanTruePatch mode, only update the changed rows and columns for the remote dataset.
    -o or --overwriteone of --patch and --overwriteBooleanFalseOverwrite mode, update records and delete extraneous rows from the remote dataset.
    -i or --ignore-tagNStringIgnore tags to copy. The option can be used multiple times.

    Examples for dataset copy

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud dataset to local project(myproject) with a new dataset name 'mnist-local'
    swcli dataset cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq local/project/myproject/mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud dataset to local default project(self) with the cloud instance dataset name 'mnist-cloud'
    swcli dataset cp --patch cloud://pre-k8s/project/dataset/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq .

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud dataset to local project(myproject) with the cloud instance dataset name 'mnist-cloud'
    swcli dataset cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq . -dlp myproject

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud dataset to local default project(self) with a dataset name 'mnist-local'
    swcli dataset cp --overwrite cloud://pre-k8s/project/dataset/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud dataset to local project(myproject) with a dataset name 'mnist-local'
    swcli dataset cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local -dlp myproject

    #- copy standalone instance(local) default project(self)'s mnist-local dataset to cloud instance(pre-k8s) mnist project with a new dataset name 'mnist-cloud'
    swcli dataset cp mnist-local/version/latest cloud://pre-k8s/project/mnist/mnist-cloud

    #- copy standalone instance(local) default project(self)'s mnist-local dataset to cloud instance(pre-k8s) mnist project with standalone instance dataset name 'mnist-local'
    swcli dataset cp mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy standalone instance(local) default project(self)'s mnist-local dataset to cloud instance(pre-k8s) mnist project without 'cloud://' prefix
    swcli dataset cp mnist-local/version/latest pre-k8s/project/mnist

    #- copy standalone instance(local) project(myproject)'s mnist-local dataset to cloud instance(pre-k8s) mnist project with standalone instance dataset name 'mnist-local'
    swcli dataset cp local/project/myproject/dataset/mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy without some tags
    swcli dataset cp mnist cloud://cloud.starwhale.cn/project/starwhale:public --ignore-tag t1 --force

    swcli dataset diff

    swcli [GLOBAL OPTIONS] dataset diff [OPTIONS] <DATASET VERSION> <DATASET VERSION>

    dataset diff compares the difference between two versions of the same dataset.

    DATASET VERSION is a dataset URI.

    OptionRequiredTypeDefaultsDescription
    --show-detailsNBooleanFalseIf true, outputs the detail information.
    swcli [全局选项] dataset head [选项] <DATASET VERSION>

    Print the first n rows of the dataset. DATASET VERSION is a dataset URI.

    OptionRequiredTypeDefaultsDescription
    -n or --rowsNInt5Print the first NUM rows of the dataset.
    -srd or --show-raw-dataNBooleanFalseFetch raw data content from objectstore.
    -st or --show-typesNBooleanFalseshow data types.

    Examples for dataset head

    #- print the first 5 rows of the mnist dataset
    swcli dataset head -n 5 mnist

    #- print the first 10 rows of the mnist(v0 version) dataset and show raw data
    swcli dataset head -n 10 mnist/v0 --show-raw-data

    #- print the data types of the mnist dataset
    swcli dataset head mnist --show-types

    #- print the remote cloud dataset's first 5 rows
    swcli dataset head cloud://cloud-cn/project/test/dataset/mnist -n 5

    #- print the first 5 rows in the json format
    swcli -o json dataset head -n 5 mnist

    swcli dataset history

    swcli [GLOBAL OPTIONS] dataset history [OPTIONS] <DATASET>

    dataset history outputs all history versions of the specified Starwhale Dataset.

    DATASET is a dataset URI.

    OptionRequiredTypeDefaultsDescription
    --fullnameNBooleanFalseShow the full version name. Only the first 12 characters are shown if this option is false.

    swcli dataset info

    swcli [GLOBAL OPTIONS] dataset info [OPTIONS] <DATASET>

    dataset info outputs detailed information about the specified Starwhale Dataset version.

    DATASET is a dataset URI.

    swcli dataset list

    swcli [GLOBAL OPTIONS] dataset list [OPTIONS]

    dataset list shows all Starwhale Datasets.

    OptionRequiredTypeDefaultsDescription
    --projectNStringThe URI of the project to list. Use the default project if not specified.
    --fullnameNBooleanFalseShow the full version name. Only the first 12 characters are shown if this option is false.
    --show-removed or -srNBooleanFalseIf true, include datasets that are removed but not garbage collected.
    --pageNInteger1The starting page number. Server and cloud instances only.
    --sizeNInteger20The number of items in one page. Server and cloud instances only.
    --filter or -flNStringShow only Starwhale Datasetes that match specified filters. This option can be used multiple times in one command.
    FilterTypeDescriptionExample
    nameKey-ValueThe name prefix of datasets--filter name=mnist
    ownerKey-ValueThe dataset owner name--filter owner=starwhale
    latestFlagIf specified, it shows only the latest version.--filter latest

    swcli dataset recover

    swcli [GLOBAL OPTIONS] dataset recover [OPTIONS] <DATASET>

    dataset recover recovers previously removed Starwhale Datasets or versions.

    DATASET is a dataset URI. If the version part of the URI is omitted, all removed versions are recovered.

    Garbage-collected Starwhale Datasets or versions can not be recovered, as well as those are removed with the --force option.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, overwrite the Starwhale Dataset or version with the same name or version id.

    swcli dataset remove

    swcli [GLOBAL OPTIONS] dataset remove [OPTIONS] <DATASET>

    dataset remove removes the specified Starwhale Dataset or version.

    DATASET is a dataset URI. If the version part of the URI is omitted, all versions are removed.

    Removed Starwhale Datasets or versions can be recovered by swcli dataset recover before garbage collection. Use the --force option to persistently remove a Starwhale Dataset or version.

    Removed Starwhale Datasets or versions can be listed by swcli dataset list --show-removed.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, persistently delete the Starwhale Dataset or version. It can not be recovered.

    swcli dataset summary

    swcli [GLOBAL OPTIONS]  dataset summary <DATASET>

    Show dataset summary. DATASET is a dataset URI.

    swcli dataset tag

    swcli [GLOBAL OPTIONS] dataset tag [OPTIONS] <DATASET> [TAGS]...

    dataset tag attaches a tag to a specified Starwhale Dataset version. At the same time, tag command also supports list and remove tags. The tag can be used in a dataset URI instead of the version id.

    DATASET is a dataset URI.

    Each dataset version can have any number of tags, but duplicated tag names are not allowed in the same dataset.

    dataset tag only works for the Standalone Instance.

    OptionRequiredTypeDefaultsDescription
    --remove or -rNBooleanFalseremove the tag if true
    --quiet or -qNBooleanFalseignore errors, for example, removing tags that do not exist.
    --force-add or -fNBooleanFalseWhen adding labels to server/cloud instances, if the label is already used by another dataset version, an error will be prompted. In this case, you can force an update using the --force-add parameter.

    Examples for dataset tag

    #- list tags of the mnist dataset
    swcli dataset tag mnist

    #- add tags for the mnist dataset
    swcli dataset tag mnist t1 t2
    swcli dataset tag cloud://cloud.starwhale.cn/project/public:starwhale/dataset/mnist/version/latest t1 --force-ad
    swcli dataset tag mnist t1 --quiet

    #- remove tags for the mnist dataset
    swcli dataset tag mnist -r t1 t2
    swcli dataset tag cloud://cloud.starwhale.cn/project/public:starwhale/dataset/mnist --remove t1
    - + \ No newline at end of file diff --git a/0.5.12/reference/swcli/index.html b/0.5.12/reference/swcli/index.html index 64198956c..8e220b13d 100644 --- a/0.5.12/reference/swcli/index.html +++ b/0.5.12/reference/swcli/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    Overview

    Usage

    swcli [OPTIONS] <COMMAND> [ARGS]...
    note

    sw and starwhale are aliases for swcli.

    Global Options

    OptionDescription
    --versionShow the Starwhale Client version
    -v or --verboseShow verbose log, support multi counts for -v args. More -v args, more logs.
    --helpShow the help message.
    caution

    Global options must be put immediately after swcli, and before any command.

    Commands

    - + \ No newline at end of file diff --git a/0.5.12/reference/swcli/instance/index.html b/0.5.12/reference/swcli/instance/index.html index d5aadfa02..8e8471744 100644 --- a/0.5.12/reference/swcli/instance/index.html +++ b/0.5.12/reference/swcli/instance/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    swcli instance

    Overview

    swcli [GLOBAL OPTIONS] instance [OPTIONS] <SUBCOMMAND> [ARGS]

    The instance command includes the following subcommands:

    • info
    • list (ls)
    • login
    • logout
    • use (select)

    swcli instance info

    swcli [GLOBAL OPTIONS] instance info [OPTIONS] <INSTANCE>

    instance info outputs detailed information about the specified Starwhale Instance.

    INSTANCE is an instance URI.

    swcli instance list

    swcli [GLOBAL OPTIONS] instance list [OPTIONS]

    instance list shows all Starwhale Instances.

    swcli instance login

    swcli [GLOBAL OPTIONS] instance login [OPTIONS] <INSTANCE>

    instance login connects to a Server/Cloud instance and makes the specified instance default.

    INSTANCE is an instance URI.

    OptionRequiredTypeDefaultsDescription
    --usernameNStringThe login username.
    --passwordNStringThe login password.
    --tokenNStringThe login token.
    --aliasYStringThe alias of the instance. You can use it anywhere that requires an instance URI.

    --username and --password can not be used together with --token.

    swcli instance logout

    swcli [GLOBAL OPTIONS] instance logout [INSTANCE]

    instance logout disconnects from the Server/Cloud instance, and clears information stored in the local storage.

    INSTANCE is an instance URI. If it is omiited, the default instance is used instead.

    swcli instance use

    swcli [GLOBAL OPTIONS] instance use <INSTANCE>

    instance use make the specified instance default.

    INSTANCE is an instance URI.

    - + \ No newline at end of file diff --git a/0.5.12/reference/swcli/job/index.html b/0.5.12/reference/swcli/job/index.html index c31412bda..e1d9ebafa 100644 --- a/0.5.12/reference/swcli/job/index.html +++ b/0.5.12/reference/swcli/job/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    swcli job

    Overview

    swcli [GLOBAL OPTIONS] job [OPTIONS] <SUBCOMMAND> [ARGS]...

    The job command includes the following subcommands:

    • cancel
    • info
    • list(ls)
    • pause
    • recover
    • remove(rm)
    • resume

    swcli job cancel

    swcli [GLOBAL OPTIONS] job cancel [OPTIONS] <JOB>

    job cancel stops the specified job. On Standalone instance, this command only takes effect for containerized jobs.

    JOB is a job URI.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, kill the Starwhale Job by force.

    swcli job info

    swcli [GLOBAL OPTIONS] job info [OPTIONS] <JOB>

    job info outputs detailed information about the specified Starwhale Job.

    JOB is a job URI.

    swcli job list

    swcli [GLOBAL OPTIONS] job list [OPTIONS]

    job list shows all Starwhale Jobs.

    OptionRequiredTypeDefaultsDescription
    --projectNStringThe URI of the project to list. Use the default project if not specified.
    --show-removed or -srNBooleanFalseIf true, include packages that are removed but not garbage collected.
    --pageNInteger1The starting page number. Server and cloud instances only.
    --sizeNInteger20The number of items in one page. Server and cloud instances only.

    swcli job pause

    swcli [GLOBAL OPTIONS] job pause [OPTIONS] <JOB>

    job pause pauses the specified job. Paused jobs can be resumed by job resume. On Standalone instance, this command only takes effect for containerized jobs.

    JOB is a job URI.

    From Starwhale's perspective, pause is almost the same as cancel, except that the job reuses the old Job id when resumed. It is job developer's responsibility to save all data periodically and load them when resumed. The job id is usually used as a key of the checkpoint.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, kill the Starwhale Job by force.

    swcli job resume

    swcli [GLOBAL OPTIONS] job resume [OPTIONS] <JOB>

    job resume resumes the specified job. On Standalone instance, this command only takes effect for containerized jobs.

    JOB is a job URI.

    - + \ No newline at end of file diff --git a/0.5.12/reference/swcli/model/index.html b/0.5.12/reference/swcli/model/index.html index cea8fb09a..cd010174d 100644 --- a/0.5.12/reference/swcli/model/index.html +++ b/0.5.12/reference/swcli/model/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    swcli model

    Overview

    swcli [GLOBAL OPTIONS] model [OPTIONS] <SUBCOMMAND> [ARGS]...

    The model command includes the following subcommands:

    • build
    • copy(cp)
    • diff
    • extract
    • history
    • info
    • list(ls)
    • recover
    • remove(rm)
    • run
    • serve
    • tag

    swcli model build

    swcli [GLOBAL OPTIONS] model build [OPTIONS] <WORKDIR>

    model build will put the whole WORKDIR into the model, except files that match patterns defined in .swignore.

    model build will import modules specified by --module to generate the required configurations to run the model. If your module depends on third-party libraries, we strongly recommend you use the --runtime option; otherwise, you need to ensure that the python environment used by swcli has these libraries installed.

    OptionRequiredTypeDefaultsDescription
    --project or -pNStringthe default projectthe project URI
    --model-yaml or -fNString${workdir}/model.yamlmodel yaml path, default use ${workdir}/model.yaml file. model.yaml is optional for model build.
    --module or -mNStringPython modules to be imported during the build process. Starwhale will export model handlers from these modules to the model package. This option supports set multiple times.
    --runtime or -rNStringthe URI of the Starwhale Runtime to use when running this command. If this option is used, this command will run in an independent python environment specified by the Starwhale Runtime; otherwise, it will run directly in the swcli's current python environment.
    --name or -nNStringmodel package name
    --desc or -dNStringmodel package description
    --package-runtime--no-package-runtimeNBooleanTrueWhen using the --runtime parameter, by default, the corresponding Starwhale runtime will become the built-in runtime for the Starwhale model. This feature can be disabled with the --no-package-runtime parameter.
    --add-allNBooleanFalseAdd all files in the working directory to the model package(excludes python cache files and virtual environment files when disabled).The .swignore file still takes effect.
    -t or --tagNGlobalString

    Examples for model build

    # build by the model.yaml in current directory and model package will package all the files from the current directory.
    swcli model build .
    # search model run decorators from mnist.evaluate, mnist.train and mnist.predict modules, then package all the files from the current directory to model package.
    swcli model build . --module mnist.evaluate --module mnist.train --module mnist.predict
    # build model package in the Starwhale Runtime environment.
    swcli model build . --module mnist.evaluate --runtime pytorch/version/v1
    # forbid to package Starwhale Runtime into the model.
    swcli model build . --module mnist.evaluate --runtime pytorch/version/v1 --no-package-runtime
    # build model package with tags.
    swcli model build . --tag tag1 --tag tag2

    swcli model copy

    swcli [GLOBAL OPTIONS] model copy [OPTIONS] <SRC> <DEST>

    model copy copies from SRC to DEST for Starwhale Model sharing.

    SRC and DEST are both model URIs.

    When copying Starwhale Model, all custom user-defined labels will be copied by default. You can use the --ignore-tag parameter to ignore certain labels. In addition, the latest and ^v\d+$ labels are Starwhale built-in labels that are only used within the instance itself and will not be copied to other instances.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, DEST will be overwritten if it exists. In addition, if the labels carried during duplication have already been used by other versions, this parameter can be used to forcibly update the labels to this version.
    -i or --ignore-tagNStringIgnore tags to copy. The option can be used multiple times.

    Examples for model copy

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local project(myproject) with a new model name 'mnist-local'
    swcli model cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq local/project/myproject/mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local default project(self) with the cloud instance model name 'mnist-cloud'
    swcli model cp cloud://pre-k8s/project/model/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq .

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local project(myproject) with the cloud instance model name 'mnist-cloud'
    swcli model cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq . -dlp myproject

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local default project(self) with a model name 'mnist-local'
    swcli model cp cloud://pre-k8s/project/model/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local project(myproject) with a model name 'mnist-local'
    swcli model cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local -dlp myproject

    #- copy standalone instance(local) default project(self)'s mnist-local model to cloud instance(pre-k8s) mnist project with a new model name 'mnist-cloud'
    swcli model cp mnist-local/version/latest cloud://pre-k8s/project/mnist/mnist-cloud

    #- copy standalone instance(local) default project(self)'s mnist-local model to cloud instance(pre-k8s) mnist project with standalone instance model name 'mnist-local'
    swcli model cp mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy standalone instance(local) default project(self)'s mnist-local model to cloud instance(pre-k8s) mnist project without 'cloud://' prefix
    swcli model cp mnist-local/version/latest pre-k8s/project/mnist

    #- copy standalone instance(local) project(myproject)'s mnist-local model to cloud instance(pre-k8s) mnist project with standalone instance model name 'mnist-local'
    swcli model cp local/project/myproject/model/mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy without some tags
    swcli model cp mnist cloud://cloud.starwhale.cn/project/starwhale:public --ignore-tag t1

    swcli model diff

    swcli [GLOBAL OPTIONS] model diff [OPTIONS] <MODEL VERSION> <MODEL VERSION>

    model diff compares the difference between two versions of the same model.

    MODEL VERSION is a model URI.

    OptionRequiredTypeDefaultsDescription
    --show-detailsNBooleanFalseIf true, outputs the detail information.

    swcli model extract

    swcli [GLOBAL OPTIONS] model extract [OPTIONS] <MODEL> <TARGET_DIR>

    The model extract command can extract a Starwhale model to a specified directory for further customization.

    MODEL is a model URI.

    OptionRequiredTypeDefaultDescription
    --force or -fNBooleanFalseIf this option is used, it will forcibly overwrite existing extracted model files in the target directory.

    Examples for model extract

    #- extract mnist model package to current directory
    swcli model extract mnist/version/xxxx .

    #- extract mnist model package to current directory and force to overwrite the files
    swcli model extract mnist/version/xxxx . -f

    swcli model history

    swcli [GLOBAL OPTIONS] model history [OPTIONS] <MODEL>

    model history outputs all history versions of the specified Starwhale Model.

    MODEL is a model URI.

    OptionRequiredTypeDefaultsDescription
    --fullnameNBooleanFalseShow the full version name. Only the first 12 characters are shown if this option is false.

    swcli model info

    swcli [GLOBAL OPTIONS] model info [OPTIONS] <MODEL>

    model info outputs detailed information about the specified Starwhale Model version.

    MODEL is a model URI.

    OptionRequiredTypeDefaultsDescription
    --output-filter or -ofNChoice of [basic/model_yaml/manifest/files/handlers/all]basicFilter the output content. Only standalone instance supports this option.

    Examples for model info

    swcli model info mnist # show basic info from the latest version of model
    swcli model info mnist/version/v0 # show basic info from the v0 version of model
    swcli model info mnist/version/latest --output-filter=all # show all info
    swcli model info mnist -of basic # show basic info
    swcli model info mnist -of model_yaml # show model.yaml
    swcli model info mnist -of handlers # show model runnable handlers info
    swcli model info mnist -of files # show model package files tree
    swcli -o json model info mnist -of all # show all info in json format

    swcli model list

    swcli [GLOBAL OPTIONS] model list [OPTIONS]

    model list shows all Starwhale Models.

    OptionRequiredTypeDefaultsDescription
    --projectNStringThe URI of the project to list. Use the default project if not specified.
    --fullnameNBooleanFalseShow the full version name. Only the first 12 characters are shown if this option is false.
    --show-removedNBooleanFalseIf true, include packages that are removed but not garbage collected.
    --pageNInteger1The starting page number. Server and cloud instances only.
    --sizeNInteger20The number of items in one page. Server and cloud instances only.
    --filter or -flNStringShow only Starwhale Models that match specified filters. This option can be used multiple times in one command.
    FilterTypeDescriptionExample
    nameKey-ValueThe name prefix of models--filter name=mnist
    ownerKey-ValueThe model owner name--filter owner=starwhale
    latestFlagIf specified, it shows only the latest version.--filter latest

    swcli model recover

    swcli [GLOBAL OPTIONS] model recover [OPTIONS] <MODEL>

    model recover recovers previously removed Starwhale Models or versions.

    MODEL is a model URI. If the version part of the URI is omitted, all removed versions are recovered.

    Garbage-collected Starwhale Models or versions can not be recovered, as well as those are removed with the --force option.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, overwrite the Starwhale Model or version with the same name or version id.

    swcli model remove

    swcli [GLOBAL OPTIONS] model remove [OPTIONS] <MODEL>

    model remove removes the specified Starwhale Model or version.

    MODEL is a model URI. If the version part of the URI is omitted, all versions are removed.

    Removed Starwhale Models or versions can be recovered by swcli model recover before garbage collection. Use the --force option to persistently remove a Starwhale Model or version.

    Removed Starwhale Models or versions can be listed by swcli model list --show-removed.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, persistently delete the Starwhale Model or version. It can not be recovered.

    swcli model run

    swcli [GLOBAL OPTIONS] model run [OPTIONS]

    model run executes a model handler. Model run supports two modes to run: model URI and local development. Model URI mode needs a pre-built Starwhale Model Package. Local development model only needs the model src dir.

    OptionRequiredTypeDefaultsDescription
    --workdir or -wNStringFor local development mode, the path of model src dir.
    --uri or -uNStringFor model URI mode, the string of model uri.
    --handler or -hNStringRunnable handler index or name, default is None, will use the first handler
    --module or -mNStringThe name of the Python module to import. This parameter can be set multiple times.
    --runtime or -rNStringthe Starwhale Runtime URI to use when running this command. If this option is used, this command will run in an independent python environment specified by the Starwhale Runtime; otherwise, it will run directly in the swcli's current python environment.
    --model-yaml-fNString${MODEL_DIR}/model.yamlThe path to the model.yaml. model.yaml is optional for model run.
    --run-project or -pNStringDefault projectProject URI, indicates the model run results will be stored in the corresponding project.
    --dataset or -dNStringDataset URI, the Starwhale dataset required for model running. This parameter can be set multiple times.
    --in-containerNBooleanFalseUse docker container to run the model. This option is only available for standalone instances. For server and cloud instances, a docker image is always used. If the runtime is a docker image, this option is always implied.
    --forbid-snapshot or -fsNBooleanFalseIn model URI mode, each model run uses a new snapshot directory. Setting this parameter will directly use the model's workdir as the run directory. In local dev mode, this parameter does not take effect, each run is in the --workdir specified directory.
    -- --user-arbitrary-argsNStringSpecify the args you defined in your handlers.

    Examples for model run

    # --> run by model uri
    # run the first handler from model uri
    swcli model run -u mnist/version/latest
    # run index id(1) handler from model uri
    swcli model run --uri mnist/version/latest --handler 1
    # run index fullname(mnist.evaluator:MNISTInference.cmp) handler from model uri
    swcli model run --uri mnist/version/latest --handler mnist.evaluator:MNISTInference.cmp

    # --> run by the working directory, which does not build model package yet. Make local debug happy.
    # run the first handler from the working directory, use the model.yaml in the working directory
    swcli model run -w .
    # run index id(1) handler from the working directory, search mnist.evaluator module and model.yaml handlers(if existed) to get runnable handlers
    swcli model run --workdir . --module mnist.evaluator --handler 1
    # run index fullname(mnist.evaluator:MNISTInference.cmp) handler from the working directory, search mnist.evaluator module to get runnable handlers
    swcli model run --workdir . --module mnist.evaluator --handler mnist.evaluator:MNISTInference.cmp
    # run the f handler in th.py from the working directory with the args defined in th:f
    # @handler()
    # def f(
    # x=ListInput(IntInput()),
    # y=2,
    # mi=MyInput(),
    # ds=DatasetInput(required=True),
    # ctx=ContextInput(),
    # )
    swcli model run -w . -m th --handler th:f -- -x 2 -x=1 --mi=blab-la --ds mnist

    swcli model serve

    Here is the English translation:

    swcli [GLOBAL OPTIONS] model serve [OPTIONS]

    The model serve command can run the model as a web server, and provide a simple web interaction interface.

    OptionRequiredTypeDefaultsDescription
    --workdir or -wNStringIn local dev mode, specify the directory of the model code.
    --uri or -uNStringIn model URI mode, specify the model URI.
    --runtime or -rNStringThe URI of the Starwhale runtime to use when running this command. If specified, the command will run in the isolated Python environment defined in the Starwhale runtime. Otherwise it will run directly in the current Python environment of swcli.
    --model-yaml or -fNString${MODEL_DIR}/model.yamlThe path to the model.yaml. model.yaml is optional for model serve.
    --module or -mNStringName of the Python module to import. This parameter can be set multiple times.
    --hostNString127.0.0.1The address for the service to listen on.
    --portNInteger8080The port for the service to listen on.

    Examples for model serve

    swcli model serve -u mnist
    swcli model serve --uri mnist/version/latest --runtime pytorch/version/latest

    swcli model serve --workdir . --runtime pytorch/version/v0
    swcli model serve --workdir . --runtime pytorch/version/v1 --host 0.0.0.0 --port 8080
    swcli model serve --workdir . --runtime pytorch --module mnist.evaluator

    swcli model tag

    swcli [GLOBAL OPTIONS] model tag [OPTIONS] <MODEL> [TAGS]...

    model tag attaches a tag to a specified Starwhale Model version. At the same time, tag command also supports list and remove tags. The tag can be used in a model URI instead of the version id.

    MODEL is a model URI.

    Each model version can have any number of tags, but duplicated tag names are not allowed in the same model.

    model tag only works for the Standalone Instance.

    OptionRequiredTypeDefaultsDescription
    --remove or -rNBooleanFalseremove the tag if true
    --quiet or -qNBooleanFalseignore errors, for example, removing tags that do not exist.
    --force-add or -fNBooleanFalseWhen adding labels to server/cloud instances, if the label is already used by another model version, an error will be prompted. In this case, you can force an update using the --force-add parameter.

    Examples for model tag

    #- list tags of the mnist model
    swcli model tag mnist

    #- add tags for the mnist model
    swcli model tag mnist t1 t2
    swcli model tag cloud://cloud.starwhale.cn/project/public:starwhale/model/mnist/version/latest t1 --force-add
    swcli model tag mnist t1 --quiet

    #- remove tags for the mnist model
    swcli model tag mnist -r t1 t2
    swcli model tag cloud://cloud.starwhale.cn/project/public:starwhale/model/mnist --remove t1
    - + \ No newline at end of file diff --git a/0.5.12/reference/swcli/project/index.html b/0.5.12/reference/swcli/project/index.html index 8acf2391f..016d80184 100644 --- a/0.5.12/reference/swcli/project/index.html +++ b/0.5.12/reference/swcli/project/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    swcli project

    Overview

    swcli [GLOBAL OPTIONS] project [OPTIONS] <SUBCOMMAND> [ARGS]...

    The project command includes the following subcommands:

    • create(add, new)
    • info
    • list(ls)
    • recover
    • remove(ls)
    • use(select)

    swcli project create

    swcli [GLOBAL OPTIONS] project create <PROJECT>

    project create creates a new project.

    PROJECT is a project URI.

    swcli project info

    swcli [GLOBAL OPTIONS] project info [OPTIONS] <PROJECT>

    project info outputs detailed information about the specified Starwhale Project.

    PROJECT is a project URI.

    swcli project list

    swcli [GLOBAL OPTIONS] project list [OPTIONS]

    project list shows all Starwhale Projects.

    OptionRequiredTypeDefaultsDescription
    --instanceNStringThe URI of the instance to list. If this option is omitted, use the default instance.
    --show-removedNBooleanFalseIf true, include projects that are removed but not garbage collected.
    --pageNInteger1The starting page number. Server and cloud instances only.
    --sizeNInteger20The number of items in one page. Server and cloud instances only.

    swcli project recover

    swcli [GLOBAL OPTIONS] project recover [OPTIONS] <PROJECT>

    project recover recovers previously removed Starwhale Projects.

    PROJECT is a project URI.

    Garbage-collected Starwhale Projects can not be recovered, as well as those are removed with the --force option.

    swcli project remove

    swcli [GLOBAL OPTIONS] project remove [OPTIONS] <PROJECT>

    project remove removes the specified Starwhale Project.

    PROJECT is a project URI.

    Removed Starwhale Projects can be recovered by swcli project recover before garbage collection. Use the --force option to persistently remove a Starwhale Project.

    Removed Starwhale Project can be listed by swcli project list --show-removed.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, persistently delete the Starwhale Project. It can not be recovered.

    swcli project use

    swcli [GLOBAL OPTIONS] project use <PROJECT>

    project use make the specified project default. You must login at first to use a project on a Server/Cloud instance.

    - + \ No newline at end of file diff --git a/0.5.12/reference/swcli/runtime/index.html b/0.5.12/reference/swcli/runtime/index.html index 1c8533ced..a3d5c754f 100644 --- a/0.5.12/reference/swcli/runtime/index.html +++ b/0.5.12/reference/swcli/runtime/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    swcli runtime

    Overview

    swcli [GLOBAL OPTIONS] runtime [OPTIONS] <SUBCOMMAND> [ARGS]...

    The runtime command includes the following subcommands:

    • activate(actv)
    • build
    • copy(cp)
    • dockerize
    • extract
    • history
    • info
    • list(ls)
    • recover
    • remove(rm)
    • tag

    swcli runtime activate

    swcli [GLOBAL OPTIONS] runtime activate [OPTIONS] <RUNTIME>

    Like source venv/bin/activate or conda activate xxx, runtime activate setups a new python environment according to the settings of the specified runtime. When the current shell is closed or switched to another one, you need to reactivate the runtime.RUNTIME is a Runtime URI.

    If you want to quit the activated runtime environment, please run venv deactivate in the venv environment or conda deactivate in the conda environment.

    The runtime activate command will build a Python isolated environment and download relevant Python packages according to the definition of the Starwhale runtime when activating the environment for the first time. This process may spend a lot of time.

    swcli runtime build

    swcli [GLOBAL OPTIONS] runtime build [OPTIONS]

    The runtime build command can build a shareable and reproducible runtime environment suitable for ML/DL from various environments or runtime.yaml file.

    Parameters

    • Parameters related to runtime building methods:
    OptionRequiredTypeDefaultsDescription
    -c or --condaNStringFind the corresponding conda environment by conda env name, export Python dependencies to generate Starwhale runtime.
    -cp or --conda-prefixNStringFind the corresponding conda environment by conda env prefix path, export Python dependencies to generate Starwhale runtime.
    -v or --venvNStringFind the corresponding venv environment by venv directory address, export Python dependencies to generate Starwhale runtime.
    -s or --shellNStringExport Python dependencies according to current shell environment to generate Starwhale runtime.
    -y or --yamlNruntime.yaml in cwd directoryBuild Starwhale runtime according to user-defined runtime.yaml.
    -d or --dockerNStringUse the docker image as Starwhale runtime.

    The parameters for runtime building methods are mutually exclusive, only one method can be specified. If not specified, it will use --yaml method to read runtime.yaml in cwd directory to build Starwhale runtime.

    • Other parameters:
    OptionRequiredScopeTypeDefaultsDescription
    --project or -pNGlobalStringDefault projectProject URI
    -del or --disable-env-lockNruntime.yaml modeBooleanFalseWhether to install dependencies in runtime.yaml and lock the version information of related dependencies. The dependencies will be locked by default.
    -nc or --no-cacheNruntime.yaml modeBooleanFalseWhether to delete the isolated environment and install related dependencies from scratch. By default dependencies will be installed in the existing isolated environment.
    --cudaNconda/venv/shell modeChoice[11.3/11.4/11.5/11.6/11.7/]CUDA version, CUDA will not be used by default.
    --cudnnNconda/venv/shell modeChoice[8/]cuDNN version, cuDNN will not be used by default.
    --archNconda/venv/shell modeChoice[amd64/arm64/noarch]noarchArchitecture
    -dpo or --dump-pip-optionsNGlobalBooleanFalseDump pip config options from the ~/.pip/pip.conf file.
    -dcc or --dump-condarcNGlobalBooleanFalseDump conda config from the ~/.condarc file.
    -t or --tagNGlobalStringRuntime tags, the option can be used multiple times.

    Examples for Starwhale Runtime building

    #- from runtime.yaml:
    swcli runtime build # use the current directory as the workdir and use the default runtime.yaml file
    swcli runtime build -y example/pytorch/runtime.yaml # use example/pytorch/runtime.yaml as the runtime.yaml file
    swcli runtime build --yaml runtime.yaml # use runtime.yaml at the current directory as the runtime.yaml file
    swcli runtime build --tag tag1 --tag tag2

    #- from conda name:
    swcli runtime build -c pytorch # lock pytorch conda environment and use `pytorch` as the runtime name
    swcli runtime build --conda pytorch --name pytorch-runtime # use `pytorch-runtime` as the runtime name
    swcli runtime build --conda pytorch --cuda 11.4 # specify the cuda version
    swcli runtime build --conda pytorch --arch noarch # specify the system architecture

    #- from conda prefix path:
    swcli runtime build --conda-prefix /home/starwhale/anaconda3/envs/pytorch # get conda prefix path by `conda info --envs` command

    #- from venv prefix path:
    swcli runtime build -v /home/starwhale/.virtualenvs/pytorch
    swcli runtime build --venv /home/starwhale/.local/share/virtualenvs/pytorch --arch amd64

    #- from docker image:
    swcli runtime build --docker pytorch/pytorch:1.9.0-cuda11.1-cudnn8-runtime # use the docker image as the runtime directly

    #- from shell:
    swcli runtime build -s --cuda 11.4 --cudnn 8 # specify the cuda and cudnn version
    swcli runtime build --shell --name pytorch-runtime # lock the current shell environment and use `pytorch-runtime` as the runtime name

    swcli runtime copy

    swcli [GLOBAL OPTIONS] runtime copy [OPTIONS] <SRC> <DEST>

    runtime copy copies from SRC to DEST. SRC and DEST are both Runtime URIs.

    When copying Starwhale Runtime, all custom user-defined labels will be copied by default. You can use the --ignore-tag parameter to ignore certain labels. In addition, the latest and ^v\d+$ labels are built-in Starwhale system labels that are only used within the instance itself and will not be copied to other instances.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, DEST will be overwritten if it exists. In addition, if the labels carried during duplication have already been used by other versions, this parameter can be used to forcibly update the labels to this version.
    -i or --ignore-tagNStringIgnore tags to copy. The option can be used multiple times.

    Examples for Starwhale Runtime copy

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local project(myproject) with a new runtime name 'mnist-local'
    swcli runtime cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq local/project/myproject/mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local default project(self) with the cloud instance runtime name 'mnist-cloud'
    swcli runtime cp cloud://pre-k8s/project/runtime/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq .

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local project(myproject) with the cloud instance runtime name 'mnist-cloud'
    swcli runtime cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq . -dlp myproject

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local default project(self) with a runtime name 'mnist-local'
    swcli runtime cp cloud://pre-k8s/project/runtime/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local project(myproject) with a runtime name 'mnist-local'
    swcli runtime cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local -dlp myproject

    #- copy standalone instance(local) default project(self)'s mnist-local runtime to cloud instance(pre-k8s) mnist project with a new runtime name 'mnist-cloud'
    swcli runtime cp mnist-local/version/latest cloud://pre-k8s/project/mnist/mnist-cloud

    #- copy standalone instance(local) default project(self)'s mnist-local runtime to cloud instance(pre-k8s) mnist project with standalone instance runtime name 'mnist-local'
    swcli runtime cp mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy standalone instance(local) default project(self)'s mnist-local runtime to cloud instance(pre-k8s) mnist project without 'cloud://' prefix
    swcli runtime cp mnist-local/version/latest pre-k8s/project/mnist

    #- copy standalone instance(local) project(myproject)'s mnist-local runtime to cloud instance(pre-k8s) mnist project with standalone instance runtime name 'mnist-local'
    swcli runtime cp local/project/myproject/runtime/mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy without some tags
    swcli runtime cp pytorch cloud://cloud.starwhale.cn/project/starwhale:public --ignore-tag t1

    swcli runtime dockerize

    swcli [GLOBAL OPTIONS] runtime dockerize [OPTIONS] <RUNTIME>

    runtime dockerize generates a docker image based on the specified runtime. Starwhale uses docker buildx to create the image. Docker 19.03 or later is required to run this command.

    RUNTIME is a Runtime URI.

    OptionRequiredTypeDefaultsDescription
    --tag or -tNStringThe tag of the docker image. This option can be repeated multiple times.
    --pushNBooleanFalseIf true, push the image to the docker registry
    --platformNStringamd64The target platform,can be either amd64 or arm64. This option can be repeated multiple times to create a multi-platform image.

    Here is the English translation:

    swcli runtime extract

    swcli [Global Options] runtime extract [Options] <RUNTIME>

    Starwhale runtimes use the compressed packages to distribute. The runtime extract command can be used to extract the runtime package for further customization and modification.

    OptionRequiredTypeDefaultDescription
    --force or -fNBooleanFalseWhether to delete and re-extract if there is already an extracted Starwhale runtime in the target directory.
    --target-dirNStringCustom extraction directory. If not specified, it will be extracted to the default Starwhale runtime workdir. The command log will show the directory location.

    swcli runtime history

    swcli [GLOBAL OPTIONS] runtime history [OPTIONS] <RUNTIME>

    runtime history outputs all history versions of the specified Starwhale Runtime.

    RUNTIME is a Runtime URI.

    OptionRequiredTypeDefaultsDescription
    --fullnameNBooleanFalseShow the full version name. Only the first 12 characters are shown if this option is false.

    swcli runtime info

    swcli [GLOBAL OPTIONS] runtime info [OPTIONS] <RUNTIME>

    runtime info outputs detailed information about a specified Starwhale Runtime version.

    RUNTIME is a Runtime URI.

    OptionRequiredTypeDefaultsDescription
    --output-filter or -ofNChoice of [basic/runtime_yaml/manifest/lock/all]basicFilter the output content. Only standalone instance supports this option.

    Examples for Starwhale Runtime info

    swcli runtime info pytorch # show basic info from the latest version of runtime
    swcli runtime info pytorch/version/v0 # show basic info
    swcli runtime info pytorch/version/v0 --output-filter basic # show basic info
    swcli runtime info pytorch/version/v1 -of runtime_yaml # show runtime.yaml content
    swcli runtime info pytorch/version/v1 -of lock # show auto lock file content
    swcli runtime info pytorch/version/v1 -of manifest # show _manifest.yaml content
    swcli runtime info pytorch/version/v1 -of all # show all info of the runtime

    swcli runtime list

    swcli [GLOBAL OPTIONS] runtime list [OPTIONS]

    runtime list shows all Starwhale Runtimes.

    OptionRequiredTypeDefaultsDescription
    --projectNStringThe URI of the project to list. Use the default project if not specified.
    --fullnameNBooleanFalseShow the full version name. Only the first 12 characters are shown if this option is false.
    --show-removed or -srNBooleanFalseIf true, include runtimes that are removed but not garbage collected.
    --pageNInteger1The starting page number. Server and cloud instances only.
    --sizeNInteger20The number of items in one page. Server and cloud instances only.
    --filter or -flNStringShow only Starwhale Runtimes that match specified filters. This option can be used multiple times in one command.
    FilterTypeDescriptionExample
    nameKey-ValueThe name prefix of runtimes--filter name=pytorch
    ownerKey-ValueThe runtime owner name--filter owner=starwhale
    latestFlagIf specified, it shows only the latest version.--filter latest

    swcli runtime recover

    swcli [GLOBAL OPTIONS] runtime recover [OPTIONS] <RUNTIME>

    runtime recover can recover previously removed Starwhale Runtimes or versions.

    RUNTIME is a Runtime URI. If the version part of the URI is omitted, all removed versions are recovered.

    Garbage-collected Starwhale Runtimes or versions can not be recovered, as well as those are removed with the --force option.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, overwrite the Starwhale Runtime or version with the same name or version id.

    swcli runtime remove

    swcli [GLOBAL OPTIONS] runtime remove [OPTIONS] <RUNTIME>

    runtime remove removes the specified Starwhale Runtime or version.

    RUNTIME is a Runtime URI. If the version part of the URI is omitted, all versions are removed.

    Removed Starwhale Runtimes or versions can be recovered by swcli runtime recover before garbage collection. Use the -- force option to persistently remove a Starwhale Runtime or version.

    Removed Starwhale Runtimes or versions can be listed by swcli runtime list --show-removed.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, persistently delete the Starwhale Runtime or version. It can not be recovered.

    swcli runtime tag

    swcli [GLOBAL OPTIONS] runtime tag [OPTIONS] <RUNTIME> [TAGS]...

    runtime tag attaches a tag to a specified Starwhale Runtime version. At the same time, tag command also supports list and remove tags. The tag can be used in a runtime URI instead of the version id.

    RUNTIME is a Runtime URI.

    Each runtime version can have any number of tags, but duplicated tag names are not allowed in the same runtime.

    runtime tag only works for the Standalone Instance.

    OptionRequiredTypeDefaultsDescription
    --remove or -rNBooleanFalseRemove the tag if true
    --quiet or -qNBooleanFalseIgnore errors, for example, removing tags that do not exist.
    --force-add or -fNBooleanFalseWhen adding labels to server/cloud instances, if the label is already used by another runtime version, an error will be prompted. In this case, you can force an update using the --force-add parameter.

    Examples for runtime tag

    #- list tags of the pytorch runtime
    swcli runtime tag pytorch

    #- add tags for the pytorch runtime
    swcli runtime tag mnist t1 t2
    swcli runtime tag cloud://cloud.starwhale.cn/project/public:starwhale/runtime/pytorch/version/latest t1 --force-add
    swcli runtime tag mnist t1 --quiet

    #- remove tags for the pytorch runtime
    swcli runtime tag mnist -r t1 t2
    swcli runtime tag cloud://cloud.starwhale.cn/project/public:starwhale/runtime/pytorch --remove t1
    - + \ No newline at end of file diff --git a/0.5.12/reference/swcli/utilities/index.html b/0.5.12/reference/swcli/utilities/index.html index 991463df3..68b339414 100644 --- a/0.5.12/reference/swcli/utilities/index.html +++ b/0.5.12/reference/swcli/utilities/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    Utility Commands

    swcli gc

    swcli [GLOBAL OPTIONS] gc [OPTIONS]

    gc clears removed projects, models, datasets, and runtimes according to the internal garbage collection policy.

    OptionRequiredTypeDefaultsDescription
    --dry-runNBooleanFalseIf true, outputs objects to be removed instead of clearing them.
    --yesNBooleanFalseBypass confirmation prompts.

    swcli check

    swcli [GLOBAL OPTIONS] check

    Check if the external dependencies of the swcli command meet the requirements. Currently mainly checks Docker and Conda.

    swcli completion install

    swcli [GLOBAL OPTIONS] completion install <SHELL_NAME>

    Install autocompletion for swcli commands. Currently supports bash, zsh and fish. If SHELL_NAME is not specified, it will try to automatically detect the current shell type.

    swcli config edit

    swcli [GLOBAL OPTIONS] config edit

    Edit the Starwhale configuration file at ~/.config/starwhale/config.yaml.

    swcli ui

    swcli [GLOBAL OPTIONS] ui <INSTANCE>

    Open the web page for the corresponding instance.

    - + \ No newline at end of file diff --git a/0.5.12/runtime/index.html b/0.5.12/runtime/index.html index f0f1b6f6d..ff447d953 100644 --- a/0.5.12/runtime/index.html +++ b/0.5.12/runtime/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    Starwhale Runtime

    Overview

    Starwhale Runtime aims to provide a reproducible and sharable running environment for python programs. You can easily share your working environment with your teammates or outsiders, and vice versa. Furthermore, you can run your programs on Starwhale Server or Starwhale Cloud without bothering with the dependencies.

    Starwhale works well with virtualenv, conda, and docker. If you are using one of them, it is straightforward to create a Starwhale Runtime based on your current environment.

    Multiple Starwhale Runtimes on your local machine can be switched freely by one command. You can work on different projects without messing up the environment.Starwhale Runtime consists of two parts: the base image and the dependencies.

    The base image

    The base is a docker image with Python, CUDA, and cuDNN installed. Starwhale provides various base images for you to choose from; see the following list:

    • Computer system architecture:
      • X86 (amd64)
      • Arm (aarch64)
    • Operating system:
      • Ubuntu 20.04 LTS (ubuntu:20.04)
    • Python:
      • 3.7
      • 3.8
      • 3.9
      • 3.10
      • 3.11
    • CUDA:
      • CUDA 11.3 + cuDNN 8.4
      • CUDA 11.4 + cuDNN 8.4
      • CUDA 11.5 + cuDNN 8.4
      • CUDA 11.6 + cuDNN 8.4
      • CUDA 11.7

    runtime.yaml

    runtime.yaml is the core configuration file of Starwhale Runtime.

    # The name of Starwhale Runtime
    name: demo
    # The mode of Starwhale Runtime: venv or conda. Default is venv.
    mode: venv
    configs:
    # If you do not use conda, ignore this field.
    conda:
    condarc: # custom condarc config file
    channels:
    - defaults
    show_channel_urls: true
    default_channels:
    - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main
    - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/r
    - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/msys2
    custom_channels:
    conda-forge: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
    pytorch: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
    pytorch-lts: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
    nvidia: https://mirrors.aliyun.com/anaconda/cloud
    ssl_verify: false
    default_threads: 10
    pip:
    # pip config set global.index-url
    index_url: https://example.org/
    # pip config set global.extra-index-url
    extra_index_url: https://another.net/
    # pip config set install.trusted-host
    trusted_host:
    - example.org
    - another.net
    environment:
    # Now it must be ubuntu:20.04
    os: ubuntu:20.04
    # CUDA version. possible values: 11.3, 11.4, 11.5, 11.6, 11.7
    cuda: 11.4
    # Python version. possible values: 3.7, 3.8, 3.9, 3.10, 3.11
    python: 3.8
    # Define your base image
    docker:
    image: mycustom.com/docker/image:tag
    dependencies:
    # If this item is present, conda env create -f conda.yml will be executed
    - conda.yaml
    # If this item is present, pip install -r requirements.txt will be executed before installing other pip packages
    - requirements.txt
    # Packages to be install with conda. venv mode will ignore the conda field.
    - conda:
    - numpy
    - requests
    # Packages to be installed with pip. The format is the same as requirements.txt
    - pip:
    - pillow
    - numpy
    - deepspeed==0.9.0
    - safetensors==0.3.0
    - transformers @ git+https://github.com/huggingface/transformers.git@3c3108972af74246bc3a0ecf3259fd2eafbacdef
    - peft @ git+https://github.com/huggingface/peft.git@fcff23f005fc7bfb816ad1f55360442c170cd5f5
    - accelerate @ git+https://github.com/huggingface/accelerate.git@eba6eb79dc2ab652cd8b44b37165a4852768a8ac
    # Additional wheels packages to be installed when restoring the runtime
    - wheels:
    - dummy-0.0.0-py3-none-any.whl
    # Additional files to be included in the runtime
    - files:
    - dest: bin/prepare.sh
    name: prepare
    src: scripts/prepare.sh
    # Run some custom commands
    - commands:
    - apt-get install -y libgl1
    - touch /tmp/runtime-command-run.flag
    - + \ No newline at end of file diff --git a/0.5.12/runtime/yaml/index.html b/0.5.12/runtime/yaml/index.html index c3782d70e..94c59b0fc 100644 --- a/0.5.12/runtime/yaml/index.html +++ b/0.5.12/runtime/yaml/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    The runtime.yaml Specification

    runtime.yaml is the configuration file that defines the properties of the Starwhale Runtime. runtime.yaml is required for the yaml mode of the swcli runtime build command.

    Examples

    The simplest example

    dependencies:
    - pip:
    - numpy
    name: simple-test

    Define a Starwhale Runtime that uses venv as the Python virtual environment for package isolation, and installs the numpy dependency.

    The llama2 example

    name: llama2
    mode: venv
    environment:
    arch: noarch
    os: ubuntu:20.04
    cuda: 11.7
    python: "3.10"
    dependencies:
    - pip:
    - torch
    - fairscale
    - fire
    - sentencepiece
    - gradio >= 3.37.0
    # external starwhale dependencies
    - starwhale[serve] >= 0.5.5

    The full definition example

    # [required]The name of Starwhale Runtime
    name: demo
    # [optional]The mode of Starwhale Runtime: venv or conda. Default is venv.
    mode: venv
    # [optional]The configurations of pip and conda.
    configs:
    # If you do not use conda, ignore this field.
    conda:
    condarc: # custom condarc config file
    channels:
    - defaults
    show_channel_urls: true
    default_channels:
    - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main
    - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/r
    - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/msys2
    custom_channels:
    conda-forge: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
    pytorch: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
    pytorch-lts: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
    nvidia: https://mirrors.aliyun.com/anaconda/cloud
    ssl_verify: false
    default_threads: 10
    pip:
    # pip config set global.index-url
    index_url: https://example.org/
    # pip config set global.extra-index-url
    extra_index_url: https://another.net/
    # pip config set install.trusted-host
    trusted_host:
    - example.org
    - another.net
    # [optional] The definition of the environment.
    environment:
    # Now it must be ubuntu:20.04
    os: ubuntu:20.04
    # CUDA version. possible values: 11.3, 11.4, 11.5, 11.6, 11.7
    cuda: 11.4
    # Python version. possible values: 3.7, 3.8, 3.9, 3.10, 3.11
    python: 3.8
    # Define your custom base image
    docker:
    image: mycustom.com/docker/image:tag
    # [required] The dependencies of the Starwhale Runtime.
    dependencies:
    # If this item is present, conda env create -f conda.yml will be executed
    - conda.yaml
    # If this item is present, pip install -r requirements.txt will be executed before installing other pip packages
    - requirements.txt
    # Packages to be install with conda. venv mode will ignore the conda field.
    - conda:
    - numpy
    - requests
    # Packages to be installed with pip. The format is the same as requirements.txt
    - pip:
    - pillow
    - numpy
    - deepspeed==0.9.0
    - safetensors==0.3.0
    - transformers @ git+https://github.com/huggingface/transformers.git@3c3108972af74246bc3a0ecf3259fd2eafbacdef
    - peft @ git+https://github.com/huggingface/peft.git@fcff23f005fc7bfb816ad1f55360442c170cd5f5
    - accelerate @ git+https://github.com/huggingface/accelerate.git@eba6eb79dc2ab652cd8b44b37165a4852768a8ac
    # Additional wheels packages to be installed when restoring the runtime
    - wheels:
    - dummy-0.0.0-py3-none-any.whl
    # Additional files to be included in the runtime
    - files:
    - dest: bin/prepare.sh
    name: prepare
    src: scripts/prepare.sh
    # Run some custom commands
    - commands:
    - apt-get install -y libgl1
    - touch /tmp/runtime-command-run.flag
    - + \ No newline at end of file diff --git a/0.5.12/server/guides/server_admin/index.html b/0.5.12/server/guides/server_admin/index.html index 0d9ee9f2a..e46080f67 100644 --- a/0.5.12/server/guides/server_admin/index.html +++ b/0.5.12/server/guides/server_admin/index.html @@ -10,14 +10,14 @@ - +
    Skip to main content
    Version: 0.5.12

    Controller Admin Settings

    Superuser Password Reset

    In case you forget the superusers password, you could use the sql below to reset the password to abcd1234

    update user_info set user_pwd='ee9533077d01d2d65a4efdb41129a91e', user_pwd_salt='6ea18d595773ccc2beacce26' where id=1

    After that, you could login to the console and then change the password to what you really want.

    System Settings

    You could customize system to make it easier to use by leverage of System setting. Here is an example below:

    dockerSetting:
    registryForPull: "docker-registry.starwhale.cn/star-whale"
    registryForPush: ""
    userName: ""
    password: ""
    insecure: true
    pypiSetting:
    indexUrl: ""
    extraIndexUrl: ""
    trustedHost: ""
    retries: 10
    timeout: 90
    imageBuild:
    resourcePool: ""
    image: ""
    clientVersion: ""
    pythonVersion: ""
    datasetBuild:
    resourcePool: ""
    image: ""
    clientVersion: ""
    pythonVersion: ""
    resourcePoolSetting:
    - name: "default"
    nodeSelector: null
    resources:
    - name: "cpu"
    max: null
    min: null
    defaults: 5.0
    - name: "memory"
    max: null
    min: null
    defaults: 3145728.0
    - name: "nvidia.com/gpu"
    max: null
    min: null
    defaults: null
    tolerations: null
    metadata: null
    isPrivate: null
    visibleUserIds: null
    storageSetting:
    - type: "minio"
    tokens:
    bucket: "users"
    ak: "starwhale"
    sk: "starwhale"
    endpoint: "http://10.131.0.1:9000"
    region: "local"
    hugeFileThreshold: "10485760"
    hugeFilePartSize: "5242880"
    - type: "s3"
    tokens:
    bucket: "users"
    ak: "starwhale"b
    sk: "starwhale"
    endpoint: "http://10.131.0.1:9000"
    region: "local"
    hugeFileThreshold: "10485760"
    hugeFilePartSize: "5242880"

    Image Registry

    Tasks dispatched by the server are based on docker images. Pulling these images could be slow if your internet is not working well. Starwhale Server supports the custom image registries, includes dockerSetting.registryForPush and dockerSetting.registryForPull.

    Resource Pool

    The resourcePoolSetting allows you to manage your cluster in a group manner. It is currently implemented by K8S nodeSelector, you could label your machines in K8S cluster and make them a resourcePool in Starwhale.

    Remote Storage

    The storageSetting allows you to manage the storages the server could access.

    storageSetting:
    - type: s3
    tokens:
    - bucket: starwhale # required
    ak: access_key # required
    sk: scret_key # required
    endpoint: http://s3.region.amazonaws.com # optional
    region: region of the service # required when endpoint is empty
    hugeFileThreshold: 10485760 # bigger than 10MB will use multiple part upload
    hugeFilePartSize: 5242880 # MB part size for multiple part upload
    - type: minio
    tokens:
    - bucket: starwhale # required
    ak: access_key # required
    sk: scret_key # required
    endpoint: http://10.131.0.1:9000 # required
    region: local # optional
    hugeFileThreshold: 10485760 # bigger than 10MB will use multiple part upload
    hugeFilePartSize: 5242880 # MB part size for multiple part upload
    - type: aliyun
    tokens:
    - bucket: starwhale # required
    ak: access_key # required
    sk: scret_key # required
    endpoint: http://10.131.0.2:9000 # required
    region: local # optional
    hugeFileThreshold: 10485760 # bigger than 10MB will use multiple part upload
    hugeFilePartSize: 5242880 # MB part size for multiple part upload

    Every storageSetting item has a corresponding implementation of StorageAccessService interface. Starwhale has four build-in implementations:

    • StorageAccessServiceAliyun matches type in (aliyun,oss)
    • StorageAccessServiceMinio matches type in (minio)
    • StorageAccessServiceS3 matches type in (s3)
    • StorageAccessServiceFile matches type in (fs, file)

    Each of the implementations has different requirements for tokens. endpoint is required when type in (aliyun,minio), region is required when type is s3 and endpoint is empty. While fs/file type requires tokens has name rootDir and serviceProvider. Please refer the code for more details.

    - + \ No newline at end of file diff --git a/0.5.12/server/index.html b/0.5.12/server/index.html index 86b89482d..bfa86de2e 100644 --- a/0.5.12/server/index.html +++ b/0.5.12/server/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    - + \ No newline at end of file diff --git a/0.5.12/server/installation/docker-compose/index.html b/0.5.12/server/installation/docker-compose/index.html index 524c44c9b..8ddf452b3 100644 --- a/0.5.12/server/installation/docker-compose/index.html +++ b/0.5.12/server/installation/docker-compose/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    Install Starwhale Server with Docker Compose

    Prerequisites

    Usage

    Start up the server

    wget https://raw.githubusercontent.com/star-whale/starwhale/main/docker/compose/compose.yaml
    GLOBAL_IP=${your_accessible_ip_for_server} ; docker compose up

    The GLOBAL_IP is the ip for Controller which could be accessed by all swcli both inside docker containers and other user machines.

    compose.yaml contains Starwhale Controller/MySQL/MinIO services. Touch a compose.override.yaml, as its name implies, can contain configuration overrides for compose.yaml. The available configurations are specified here

    - + \ No newline at end of file diff --git a/0.5.12/server/installation/docker/index.html b/0.5.12/server/installation/docker/index.html index f83789a9f..9f5ca4065 100644 --- a/0.5.12/server/installation/docker/index.html +++ b/0.5.12/server/installation/docker/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    Install Starwhale Server with Docker

    Prerequisites

    • A running Kubernetes 1.19+ cluster to run tasks.
    • A running MySQL 8.0+ instance to store metadata.
    • A S3-compatible object storage to save datasets, models, and others.

    Please make sure pods on the Kubernetes cluster can access the port exposed by the Starwhale Server installation.

    Prepare an env file for Docker

    Starwhale Server can be configured by environment variables.

    An env file template for Docker is here. You may create your own env file by modifying the template.

    Prepare a kubeconfig file [Optional][SW_SCHEDULER=k8s]

    The kubeconfig file is used for accessing the Kubernetes cluster. For more information about kubeconfig files, see the Official Kubernetes Documentation.

    If you have a local kubectl command-line tool installed, you can run kubectl config view to see your current configuration.

    Run the Docker image

    docker run -it -d --name starwhale-server -p 8082:8082 \
    --restart unless-stopped \
    --mount type=bind,source=<path to your kubeconfig file>,destination=/root/.kube/config,readonly \
    --env-file <path to your env file> \
    ghcr.io/star-whale/server:0.5.6

    For users in the mainland of China, use docker image: docker-registry.starwhale.cn/star-whale/server.

    - + \ No newline at end of file diff --git a/0.5.12/server/installation/helm-charts/index.html b/0.5.12/server/installation/helm-charts/index.html index 23fa66489..5391e6773 100644 --- a/0.5.12/server/installation/helm-charts/index.html +++ b/0.5.12/server/installation/helm-charts/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    Install Starwhale Server with Helm

    Prerequisites

    • A running Kubernetes 1.19+ cluster to run tasks.
    • A running MySQL 8.0+ instance to store metadata.
    • A S3-compatible object storage system to save datasets, models, and others.
    • Helm 3.2.0+.

    The Starwhale Helm Charts includes MySQL and MinIO as dependencies. If you do not have your own MySQL instance or any S3-compatible object storage available, use the Helm Charts to install. Please check Installation Options to learn how to install Starwhale Server with MySQL and MinIO.

    Create a service account on Kubernetes for Starwhale Server

    If Kubernetes RBAC is enabled (In Kubernetes 1.6+, RBAC is enabled by default), Starwhale Server can not work properly unless is started by a service account with at least the following permissions:

    ResourceAPI GroupGetListWatchCreateDelete
    jobsbatchYYYYY
    podscoreYYY
    nodescoreYYY
    events""Y

    Example:

    apiVersion: rbac.authorization.k8s.io/v1
    kind: ClusterRole
    metadata:
    name: starwhale-role
    rules:
    - apiGroups:
    - ""
    resources:
    - pods
    - nodes
    verbs:
    - get
    - list
    - watch
    - apiGroups:
    - "batch"
    resources:
    - jobs
    verbs:
    - create
    - get
    - list
    - watch
    - delete
    - apiGroups:
    - ""
    resources:
    - events
    verbs:
    - get
    - watch
    - list
    ---
    apiVersion: rbac.authorization.k8s.io/v1
    kind: ClusterRoleBinding
    metadata:
    name: starwhale-binding
    roleRef:
    apiGroup: rbac.authorization.k8s.io
    kind: ClusterRole
    name: starwhale-role
    subjects:
    - kind: ServiceAccount
    name: starwhale

    Downloading Starwhale Helm Charts

    helm repo add starwhale https://star-whale.github.io/charts
    helm repo update

    Installing Starwhale Server

    helm install starwhale-server starwhale/starwhale-server -n starwhale --create-namespace

    If you have a local kubectl command-line tool installed, you can run kubectl get pods -n starwhale to check if all pods are running.

    Updating Starwhale Server

    helm repo update
    helm upgrade starwhale-server starwhale/starwhale-server

    Uninstalling Starwhale Server

    helm delete starwhale-server
    - + \ No newline at end of file diff --git a/0.5.12/server/installation/index.html b/0.5.12/server/installation/index.html index e0897949f..cb82fc65c 100644 --- a/0.5.12/server/installation/index.html +++ b/0.5.12/server/installation/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    - + \ No newline at end of file diff --git a/0.5.12/server/installation/minikube/index.html b/0.5.12/server/installation/minikube/index.html index 71334e96c..9a53c9927 100644 --- a/0.5.12/server/installation/minikube/index.html +++ b/0.5.12/server/installation/minikube/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    Install Starwhale Server with Minikube

    Prerequisites

    Starting Minikube

    minikube start --addons ingress --kubernetes-version=1.25.3

    For users in the mainland of China, please add --image-mirror-country=cn parameter. If there is no kubectl bin in your machine, you may use minikube kubectl or alias kubectl="minikube kubectl --" alias command.

    Installing Starwhale Server

    helm repo add starwhale https://star-whale.github.io/charts
    helm repo update
    helm pull starwhale/starwhale --untar --untardir ./charts

    helm upgrade --install starwhale ./charts/starwhale -n starwhale --create-namespace -f ./charts/starwhale/values.minikube.global.yaml

    For users in the mainland of China, use values.minikube.global.yaml:

    helm upgrade --install starwhale ./charts/starwhale -n starwhale --create-namespace -f ./charts/starwhale/values.minikube.cn.yaml

    After the installation is successful, the following prompt message appears:

        Release "starwhale" has been upgraded. Happy Helming!
    NAME: starwhale
    LAST DEPLOYED: Tue Feb 14 16:25:03 2023
    NAMESPACE: starwhale
    STATUS: deployed
    REVISION: 14
    NOTES:
    ******************************************
    Chart Name: starwhale
    Chart Version: 0.5.6
    App Version: latest
    Starwhale Image:
    - server: ghcr.io/star-whale/server:latest

    ******************************************
    Controller:
    - visit: http://controller.starwhale.svc
    Minio:
    - web visit: http://minio.starwhale.svc
    - admin visit: http://minio-admin.starwhale.svc
    MySQL:
    - port-forward:
    - run: kubectl port-forward --namespace starwhale svc/mysql 3306:3306
    - visit: mysql -h 127.0.0.1 -P 3306 -ustarwhale -pstarwhale
    Please run the following command for the domains searching:
    echo "$(sudo minikube ip) controller.starwhale.svc minio.starwhale.svc minio-admin.starwhale.svc " | sudo tee -a /etc/hosts
    ******************************************
    Login Info:
    - starwhale: u:starwhale, p:abcd1234
    - minio admin: u:minioadmin, p:minioadmin

    *_* Enjoy to use Starwhale Platform. *_*

    Checking Starwhale Server status

    Keep checking the minikube service status until all deployments are running(waiting for 3~5 mins):

    kubectl get deployments -n starwhale
    NAMEREADYUP-TO-DATEAVAILABLEAGE
    controller1/1115m
    minio1/1115m
    mysql1/1115m

    Visiting for local

    Make the Starwhale controller accessible locally with the following command:

    echo "$(sudo minikube ip) controller.starwhale.svc minio.starwhale.svc  minio-admin.starwhale.svc " | sudo tee -a /etc/hosts

    Then you can visit http://controller.starwhale.svc in your local web browser.

    Visiting for others

    • Step 1: in the Starwhale Server machine

      for temporary use with socat command:

      # install socat at first, ref: https://howtoinstall.co/en/socat
      sudo socat TCP4-LISTEN:80,fork,reuseaddr,bind=0.0.0.0 TCP4:`minikube ip`:80

      When you kill the socat process, the share access will be blocked. iptables maybe a better choice for long-term use.

    • Step 2: in the other machines

      # for macOSX or Linux environment, run the command in the shell.
      echo ${your_machine_ip} controller.starwhale.svc minio.starwhale.svc minio-admin.starwhale.svc " | sudo tee -a /etc/hosts

      # for Windows environment, run the command in the PowerShell with administrator permission.
      Add-Content -Path C:\Windows\System32\drivers\etc\hosts -Value "`n${your_machine_ip} controller.starwhale.svc minio.starwhale.svc minio-admin.starwhale.svc"
    - + \ No newline at end of file diff --git a/0.5.12/server/installation/starwhale_env/index.html b/0.5.12/server/installation/starwhale_env/index.html index efb29e09d..6f23e4389 100644 --- a/0.5.12/server/installation/starwhale_env/index.html +++ b/0.5.12/server/installation/starwhale_env/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    Starwhale Server Environment Example

    ################################################################################
    # *** Required ***
    # The external Starwhale server URL. For example: https://cloud.starwhale.ai
    SW_INSTANCE_URI=

    # The listening port of Starwhale Server
    SW_CONTROLLER_PORT=8082

    # The maximum upload file size. This setting affects datasets and models uploading when copied from outside.
    SW_UPLOAD_MAX_FILE_SIZE=20480MB
    ################################################################################
    # The base URL of the Python Package Index to use when creating a runtime environment.
    SW_PYPI_INDEX_URL=http://10.131.0.1/repository/pypi-hosted/simple/

    # Extra URLs of package indexes to use in addition to the base url.
    SW_PYPI_EXTRA_INDEX_URL=

    # Space separated hostnames. When any host specified in the base URL or extra URLs does not have a valid SSL
    # certification, use this option to trust it anyway.
    SW_PYPI_TRUSTED_HOST=
    ################################################################################
    # The JWT token expiration time. When the token expires, the server will request the user to login again.
    SW_JWT_TOKEN_EXPIRE_MINUTES=43200

    # *** Required ***
    # The JWT secret key. All strings are valid, but we strongly recommend you to use a random string with at least 16 characters.
    SW_JWT_SECRET=
    ################################################################################
    # The scheduler controller to use. Valid values are:
    # docker: Controller schedule jobs by leveraging docker
    # k8s: Controller schedule jobs by leveraging Kubernetes
    SW_SCHEDULER=k8s

    # The Kubernetes namespace to use when running a task when SW_SCHEDULER is k8s
    SW_K8S_NAME_SPACE=default

    # The path on the Kubernetes host node's filesystem to cache Python packages. Use the setting only if you have
    # the permission to use host node's filesystem. The runtime environment setup process may be accelerated when the host
    # path cache is used. Leave it blank if you do not want to use it.
    SW_K8S_HOST_PATH_FOR_CACHE=

    # The ip for the containers created by Controller when SW_SCHEDULER is docker
    SW_DOCKER_CONTAINER_NODE_IP=127.0.0.1
    ###############################################################################
    # *** Required ***
    # The object storage system type. Valid values are:
    # s3: [AWS S3](https://aws.amazon.com/s3) or other s3-compatible object storage systems
    # aliyun: [Aliyun OSS](https://www.alibabacloud.com/product/object-storage-service)
    # minio: [MinIO](https://min.io)
    # file: Local filesystem
    SW_STORAGE_TYPE=

    # The path prefix for all data saved on the storage system.
    SW_STORAGE_PREFIX=
    ################################################################################
    # The following settings are only used when SW_STORAGE_TYPE is file.

    # The root directory to save data.
    # This setting is only used when SW_STORAGE_TYPE is file.
    SW_STORAGE_FS_ROOT_DIR=/usr/local/starwhale
    ################################################################################
    # The following settings are only used when SW_STORAGE_TYPE is not file.

    # *** Required ***
    # The name of the bucket to save data.
    SW_STORAGE_BUCKET=

    # *** Required ***
    # The endpoint URL of the object storage service.
    # This setting is only used when SW_STORAGE_TYPE is s3 or aliyun.
    SW_STORAGE_ENDPOINT=

    # *** Required ***
    # The access key used to access the object storage system.
    SW_STORAGE_ACCESSKEY=

    # *** Required ***
    # The secret access key used to access the object storage system.
    SW_STORAGE_SECRETKEY=

    # *** Optional ***
    # The region of the object storage system.
    SW_STORAGE_REGION=

    # Starwhale Server will use multipart upload when uploading a large file. This setting specifies the part size.
    SW_STORAGE_PART_SIZE=5MB
    ################################################################################
    # MySQL settings

    # *** Required ***
    # The hostname/IP of the MySQL server.
    SW_METADATA_STORAGE_IP=

    # The port of the MySQL server.
    SW_METADATA_STORAGE_PORT=3306

    # *** Required ***
    # The database used by Starwhale Server
    SW_METADATA_STORAGE_DB=starwhale

    # *** Required ***
    # The username of the MySQL server.
    SW_METADATA_STORAGE_USER=

    # *** Required ***
    # The password of the MySQL server.
    SW_METADATA_STORAGE_PASSWORD=
    ################################################################################
    - + \ No newline at end of file diff --git a/0.5.12/server/project/index.html b/0.5.12/server/project/index.html index 3ec9a2fb3..c6f661c6b 100644 --- a/0.5.12/server/project/index.html +++ b/0.5.12/server/project/index.html @@ -10,7 +10,7 @@ - + @@ -18,7 +18,7 @@
    Skip to main content
    Version: 0.5.12

    Project Management

    Project type

    There are two types of projects:

    • Public: Visible to anyone. Everyone on the internet can find and see public projects.

    • Private: Visible to users specified in the project member settings. Private projects can only be seen by project owners and project members. The project owner can manage access in the project setting of Manage Member.

    Create a project

    1 Sign in to Starwhale, click Create Project.

    creat

    2 Type a name for the project.

    image

    tip

    Avoid duplicate project names.For more information, see Names in Starwhale

    3 Select project visibility to decide who can find and see the project.

    image

    4 Type a description. It is optional.

    image

    5 To finish, click Submit.

    image

    Edit a project

    The name, privacy and description of a project can be edited.

    tip

    Users with the project owner or maintainer role can edit a project. For more information, see Roles and permissions

    Edit name

    • If you are on the project list page:

      1 Hover your mouse over the project you want to edit, then click the Edit button.

      image

      2 Enter a new name for the project.

      image

      tip

      Avoid duplicate project names. For more information, see Names in Starwhale

      3 Click Submit to save changes.

      image

      4 If you're editing multiple projects, repeat steps 1 through 3.

    • If you are on a specific project:

      1 Select Overview on the left navigation, and click Edit.

      image

      2 Enter a new name for the project.

      image

      tip

      Avoid duplicate project names. For more information, see Names in Starwhale

      3 Click Submit to save changes.

      image

    Edit privacy

    • If you are on the project list page:

      1 Hover your mouse over the project you want to edit, then click the Edit button.

      image

      2 Click the Public or Private by your command. For more information, see Project types.

      image

      3 Click Submit to save changes.

      image

    • If you are on a specific project

      1 Select Overview on the left navigation, and click Edit.

      image

      2 Click the Public or Private by your command. For more information, see Project types.

      image

      3 Click Submit to save changes.

      image

    Edit description

    • If you are on the project list page:

      1 Hover your mouse over the project you want to edit, then click the Edit button.

      image

      2 Enter any description you want to describe the project.

      image

      3 Click Submit to save changes.

      image

    • If you are on a specific project

      1 Select Overview on the left navigation, and click Edit.

      image

      2 Enter any description you want to describe the project.

      image

      3 Click Submit to save changes.

      image

    Delete a project

    1 Hover your mouse over the project you want to delete, then click the Delete button.

    image

    2 If you are sure to delete, type the exact name of the project and then click Confirm to delete the project.

    image

    :::Important: When you delete a project, all the models, datasets, evaluations and runtimes belonging to the project will also be deleted and can not be restored. Be careful about the action. :::

    Manage project member

    Only users with the admin role can assign people to the project. The project owner defaulted to having the project owner role.

    Add a member to the project

    1 On the project list page or overview tab, click the Manage Member button, then Add Member.

    image

    image

    2 Type the username you want to add to the project, then click a name in the list of matches.

    image

    3 Select a project role for the member from the drop-down menu.For more information, see Roles and permissions

    image

    4 To finish, click Submit.

    image

    Remove a member

    1 On the project list page or project overview tab, click the Manage Member button.

    image

    2 Find the username you want to remove in the search box, click Remove, then Yes.

    image

    - + \ No newline at end of file diff --git a/0.5.12/swcli/config/index.html b/0.5.12/swcli/config/index.html index 0c66b95de..cad4a226c 100644 --- a/0.5.12/swcli/config/index.html +++ b/0.5.12/swcli/config/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    Configuration

    Standalone Instance is installed on the user's laptop or development server, providing isolation at the level of Linux/macOX users. Users can install the Starwhale Python package using the pip command and execute any swcli command. After that, they can view their Starwhale configuration in ~/.config/starwhale/config.yaml. In the vast majority of cases, users do not need to manually modify the config.yaml file.

    The ~/.config/starwhale/config.yaml file has permissions set to 0o600 to ensure security, as it contains sensitive information such as encryption keys. Users are advised not to change the file permissions.You could customize your swcli by swci config edit:

    swcli config edit

    config.yaml example

    The typical config.yaml file is as follows:

    • The default instance is local.
    • cloud-cn/cloud-k8s/pre-k8s are the server/cloud instances, local is the standalone instance.
    • The local storage root directory for the Standalone Instance is set to /home/liutianwei/.starwhale.
    current_instance: local
    instances:
    cloud-cn:
    sw_token: ${TOKEN}
    type: cloud
    updated_at: 2022-09-28 18:41:05 CST
    uri: https://cloud.starwhale.cn
    user_name: starwhale
    user_role: normal
    cloud-k8s:
    sw_token: ${TOKEN}
    type: cloud
    updated_at: 2022-09-19 16:10:01 CST
    uri: http://cloud.pre.intra.starwhale.ai
    user_name: starwhale
    user_role: normal
    local:
    current_project: self
    type: standalone
    updated_at: 2022-06-09 16:14:02 CST
    uri: local
    user_name: liutianwei
    pre-k8s:
    sw_token: ${TOKEN}
    type: cloud
    updated_at: 2022-09-19 18:06:50 CST
    uri: http://console.pre.intra.starwhale.ai
    user_name: starwhale
    user_role: normal
    link_auths:
    - ak: starwhale
    bucket: users
    connect_timeout: 10.0
    endpoint: http://10.131.0.1:9000
    read_timeout: 100.0
    sk: starwhale
    type: s3
    storage:
    root: /home/liutianwei/.starwhale
    version: '2.0'

    config.yaml definition

    ParameterDescriptionTypeDefault ValueRequired
    current_instanceThe name of the default instance to use. It is usually set using the swcli instance select command.StringselfYes
    instancesManaged instances, including Standalone, Server and Cloud Instances. There must be at least one Standalone Instance named "local" and one or more Server/Cloud Instances. You can log in to a new instance with swcli instance login and log out from an instance with swcli instance logout.DictStandalone Instance named "local"Yes
    instances.{instance-alias-name}.sw_tokenLogin token for Server/Cloud Instances. It is only effective for Server/Cloud Instances. Subsequent swcli operations on Server/Cloud Instances will use this token. Note that tokens have an expiration time, typically set to one month, which can be configured within the Server/Cloud Instance.StringCloud - Yes, Standalone - No
    instances.{instance-alias-name}.typeType of the instance, currently can only be "cloud" or "standalone".Choice[string]Yes
    instances.{instance-alias-name}.uriFor Server/Cloud Instances, the URI is an http/https address. For Standalone Instances, the URI is set to "local".StringYes
    instances.{instance-alias-name}.user_nameUser's nameStringYes
    instances.{instance-alias-name}.current_projectDefault Project under the current instance. It will be used to fill the "project" field in the URI representation by default. You can set it using the swcli project select command.StringYes
    instances.{instance-alias-name}.user_roleUser's role.StringnormalYes
    instances.{instance-alias-name}.updated_atThe last updated time for this instance configuration.Time format stringYes
    storageSettings related to local storage.DictYes
    storage.rootThe root directory for Standalone Instance's local storage. Typically, if there is insufficient space in the home directory and you manually move data files to another location, you can modify this field.String~/.starwhaleYes
    versionThe version of config.yaml, currently only supports 2.0.String2.0Yes

    You could put starwhale.Link to your assets while the URI in the Link could be whatever(only s3 like or http is implemented) you need, such as s3://10.131.0.1:9000/users/path. However, Links may need to be authed, you could config the auth info in link_auths.

    link_auths:
    - type: s3
    ak: starwhale
    bucket: users
    region: local
    connect_timeout: 10.0
    endpoint: http://10.131.0.1:9000
    read_timeout: 100.0
    sk: starwhale

    Items in link_auths will match the uri in Links automatically. s3 typed link_auth matching Links by looking up bucket and endpoint.

    - + \ No newline at end of file diff --git a/0.5.12/swcli/index.html b/0.5.12/swcli/index.html index 014cc87b5..2b2353df5 100644 --- a/0.5.12/swcli/index.html +++ b/0.5.12/swcli/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    Starwhale Client (swcli) User Guide

    The Starwhale Client (swcli) is a command-line tool that enables you to interact with Starwhale instances. You can use swcli to complete almost all tasks in Starwhale. swcli is written in pure python3 (require Python 3.7 | 3.11) so that it can be easily installed by the pip command. Currently, swcli only supports Linux and macOS, Windows is coming soon.

    - + \ No newline at end of file diff --git a/0.5.12/swcli/installation/index.html b/0.5.12/swcli/installation/index.html index 68887df33..846569a7e 100644 --- a/0.5.12/swcli/installation/index.html +++ b/0.5.12/swcli/installation/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    Installation Guide

    We can use swcli to complete all tasks for Starwhale Instances. swcli is written by pure python3, which can be installed easily by the pip command.Here are some installation tips that can help you get a cleaner, unambiguous, no dependency conflicts swcli python environment.

    Installing Advice

    DO NOT install Starwhale in your system's global Python environment. It will cause a python dependency conflict problem.

    Prerequisites

    • Python 3.7 ~ 3.11
    • Linux or macOS
    • Conda (optional)

    In the Ubuntu system, you can run the following commands:

    sudo apt-get install python3 python3-venv python3-pip

    #If you want to install multi python versions
    sudo add-apt-repository -y ppa:deadsnakes/ppa
    sudo apt-get update
    sudo apt-get install -y python3.7 python3.8 python3.9 python3-pip python3-venv python3.8-venv python3.7-venv python3.9-venv

    swcli works on macOS. If you run into issues with the default system Python3 on macOS, try installing Python3 through the homebrew:

    brew install python3

    Install swcli

    Install with venv

    python3 -m venv ~/.cache/venv/starwhale
    source ~/.cache/venv/starwhale/bin/activate
    python3 -m pip install starwhale

    swcli --version

    sudo rm -rf /usr/local/bin/swcli
    sudo ln -s `which swcli` /usr/local/bin/

    Install with conda

    conda create --name starwhale --yes  python=3.9
    conda activate starwhale
    python3 -m pip install starwhale

    swcli --version

    sudo rm -rf /usr/local/bin/swcli
    sudo ln -s `which swcli` /usr/local/bin/

    👏 Now, you can use swcli in the global environment.

    Install for the special scenarios

    # for Audio processing
    python -m pip install starwhale[audio]

    # for Image processing
    python -m pip install starwhale[pillow]

    # for swcli model server command
    python -m pip install starwhale[server]

    # for built-in online serving
    python -m pip install starwhale[online-serve]

    # install all dependencies
    python -m pip install starwhale[all]

    Update swcli

    #for venv
    python3 -m pip install --upgrade starwhale

    #for conda
    conda run -n starwhale python3 -m pip install --upgrade starwhale

    Uninstall swcli

    python3 -m pip remove starwhale

    rm -rf ~/.config/starwhale
    rm -rf ~/.starwhale
    - + \ No newline at end of file diff --git a/0.5.12/swcli/swignore/index.html b/0.5.12/swcli/swignore/index.html index 8c858265c..635f93097 100644 --- a/0.5.12/swcli/swignore/index.html +++ b/0.5.12/swcli/swignore/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    About the .swignore file

    The .swignore file is similar to .gitignore, .dockerignore, and other files used to define ignored files or dirs. The .swignore files mainly used in the Starwhale Model building process. By default, the swcli model build command or starwhale.model.build() Python SDK will traverse all files in the specified directory and automatically exclude certain known files or directories that are not suitable for inclusion in the model package.

    PATTERN FORMAT

    • Each line in a swignore file specifies a pattern, which matches files and directories.
    • A blank line matches no files, so it can serve as a separator for readability.
    • An asterisk * matches anything except a slash.
    • A line starting with # serves as a comment.
    • Support wildcard expression, for example: *.jpg, .png.

    Auto Ingored files or dirs

    If you want to include the auto ingored files or dirs, you can add --add-all for swcli model build command.

    • __pycache__/
    • *.py[cod]
    • *$py.class
    • venv installation dir
    • conda installation dir

    Example

    Here is the .swignore file used in the MNIST example:

    venv/*
    .git/*
    .history*
    .vscode/*
    .venv/*
    data/*
    .idea/*
    *.py[cod]
    - + \ No newline at end of file diff --git a/0.5.12/swcli/uri/index.html b/0.5.12/swcli/uri/index.html index 0fb3162db..4ca9d46c0 100644 --- a/0.5.12/swcli/uri/index.html +++ b/0.5.12/swcli/uri/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.5.12

    Starwhale Resources URI

    tip

    Resource URI is widely used in Starwhale client commands. The URI can refer to a resource in the local instance or any other resource in a remote instance. In this way, the Starwhale client can easily manipulate any resource.

    concepts-org.jpg

    Instance URI

    Instance URI can be either:

    • local: standalone instance.
    • [http(s)://]<hostname or ip>[:<port>]: cloud instance with HTTP address.
    • [cloud://]<cloud alias>: cloud or server instance with an alias name, which can be configured in the instance login phase.
    caution

    "local" is different from "localhost". The former means the local standalone instance without a controller, while the latter implies a controller listening at the default port 8082 on the localhost.

    Example:

    # log in Starwhale Cloud; the alias is swcloud
    swcli instance login --username <your account name> --password <your password> https://cloud.starwhale.ai --alias swcloud

    # copy a model from the local instance to the cloud instance
    swcli model copy mnist/version/latest swcloud/project/<your account name>:demo

    # copy a runtime to a Starwhale Server instance: http://localhost:8081
    swcli runtime copy pytorch/version/v1 http://localhost:8081/project/<your account name>:demo

    Project URI

    Project URI is in the format [<Instance URI>/project/]<project name>. If the instance URI is not specified, use the current instance instead.

    Example:

    swcli project select self   # select the self project in the current instance
    swcli project info local/project/self # inspect self project info in the local instance

    Model/Dataset/Runtime URI

    • Model URI: [<Project URI>/model/]<model name>[/version/<version id|tag>].
    • Dataset URI: [<Project URI>/dataset/]<dataset name>[/version/<version id|tag>].
    • Runtime URI: [<Project URI>/runtime/]<runtime name>[/version/<version id|tag>].
    tip
    • swcli supports human-friendly short version id. You can type the first few characters of the version id, provided it is at least four characters long and unambiguous. However, the recover command must use the complete version id.
    • If the project URI is not specified, the default project will be used.
    • You can always use the version tag instead of the version id.

    Example:

    swcli model info mnist/version/hbtdenjxgm4ggnrtmftdgyjzm43tioi  # inspect model info, model name: mnist, version:hbtdenjxgm4ggnrtmftdgyjzm43tioi
    swcli model remove mnist/version/hbtdenj # short version
    swcli model info mnist # inspect mnist model info
    swcli model run mnist --runtime pytorch-mnist --dataset mnist # use the default latest tag

    Job URI

    • format: [<Project URI>/job/]<job id>.
    • If the project URI is not specified, the default project will be used.

    Example:

    swcli job info mezdayjzge3w   # Inspect mezdayjzge3w version in default instance and default project
    swcli job info local/project/self/job/mezday # Inspect the local instance, self project, with short job id:mezday

    The default instance

    When the instance part of a project URI is omitted, the default instance is used instead. The default instance is the one selected by the swcli instance login or swcli instance use command.

    The default project

    When the project parts of Model/Dataset/Runtime/Evaluation URIs are omitted, the default project is used instead. The default project is the one selected by the swcli project use command.

    - + \ No newline at end of file diff --git a/0.6.0/cloud/billing/bills/index.html b/0.6.0/cloud/billing/bills/index.html index edb6c8593..d3b139558 100644 --- a/0.6.0/cloud/billing/bills/index.html +++ b/0.6.0/cloud/billing/bills/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    - + \ No newline at end of file diff --git a/0.6.0/cloud/billing/index.html b/0.6.0/cloud/billing/index.html index a26db04df..75b20940b 100644 --- a/0.6.0/cloud/billing/index.html +++ b/0.6.0/cloud/billing/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    - + \ No newline at end of file diff --git a/0.6.0/cloud/billing/recharge/index.html b/0.6.0/cloud/billing/recharge/index.html index 6ffac20c7..c796f8ca6 100644 --- a/0.6.0/cloud/billing/recharge/index.html +++ b/0.6.0/cloud/billing/recharge/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    - + \ No newline at end of file diff --git a/0.6.0/cloud/billing/refund/index.html b/0.6.0/cloud/billing/refund/index.html index e2701494a..07fbadf60 100644 --- a/0.6.0/cloud/billing/refund/index.html +++ b/0.6.0/cloud/billing/refund/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    - + \ No newline at end of file diff --git a/0.6.0/cloud/billing/voucher/index.html b/0.6.0/cloud/billing/voucher/index.html index b38a754c4..ffbefbe88 100644 --- a/0.6.0/cloud/billing/voucher/index.html +++ b/0.6.0/cloud/billing/voucher/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    - + \ No newline at end of file diff --git a/0.6.0/cloud/index.html b/0.6.0/cloud/index.html index c178f39b3..31a9eaf77 100644 --- a/0.6.0/cloud/index.html +++ b/0.6.0/cloud/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    Starwhale Cloud User Guide

    Starwhale Cloud is a service hosted on public cloud and operated by the Starwhale team. The access url is https://cloud.starwhale.cn.

    - + \ No newline at end of file diff --git a/0.6.0/community/contribute/index.html b/0.6.0/community/contribute/index.html index 97717c884..786dcd9ff 100644 --- a/0.6.0/community/contribute/index.html +++ b/0.6.0/community/contribute/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    Contribute to Starwhale

    Getting Involved/Contributing

    We welcome and encourage all contributions to Starwhale, including and not limited to:

    • Describe the problems encountered during use.
    • Submit feature request.
    • Discuss in Slack and Github Issues.
    • Code Review.
    • Improve docs, tutorials and examples.
    • Fix Bug.
    • Add Test Case.
    • Code readability and code comments to import readability.
    • Develop new features.
    • Write enhancement proposal.

    You can get involved, get updates and contact Starwhale developers in the following ways:

    Starwhale Resources

    Code Structure

    • client: swcli and Python SDK with Pure Python3, which includes all Standalone Instance features.
      • api: Python SDK.
      • cli: Command Line Interface entrypoint.
      • base: Python base abstract.
      • core: Starwhale core concepts which includes Dataset,Model,Runtime,Project, job and Evaluation, etc.
      • utils: Python utilities lib.
    • console: frontend with React + TypeScript.
    • server:Starwhale Controller with java, which includes all Starwhale Cloud Instance backend apis.
    • docker:Helm Charts, dockerfile.
    • docs:Starwhale官方文档。
    • example:Example code.
    • scripts:Bash and Python scripts for E2E testing and software releases, etc.

    Fork and clone the repository

    You will need to fork the code of Starwhale repository and clone it to your local machine.

    • Fork Starwhale repository: Fork Starwhale Github Repo,For more usage details, please refer to: Fork a repo

    • Install Git-LFS:Git-LFS

       git lfs install
    • Clone code to local machine

      git clone https://github.com/${your username}/starwhale.git

    Development environment for Standalone Instance

    Standalone Instance is written in Python3. When you want to modify swcli and sdk, you need to build the development environment.

    Standalone development environment prerequisites

    • OS: Linux or macOS
    • Python: 3.7~3.11
    • Docker: >=19.03(optional)
    • Python isolated env tools:Python venv, virtualenv or conda, etc

    Building from source code

    Based on the previous step, clone to the local directory: starwhale, and enter the client subdirectory:

    cd starwhale/client

    Create an isolated python environment with conda:

    conda create -n starwhale-dev python=3.8 -y
    conda activate starwhale-dev

    Install client package and python dependencies into the starwhale-dev environment:

    make install-sw
    make install-dev-req

    Validate with the swcli --version command. In the development environment, the version is 0.0.0.dev0:

    ❯ swcli --version
    swcli, version 0.0.0.dev0

    ❯ swcli --version
    /home/username/anaconda3/envs/starwhale-dev/bin/swcli

    Modifying the code

    When you modify the code, you need not to install python package(run make install-sw command) again. .editorconfig will be imported into the most IDE and code editors which helps maintain consistent coding styles for multiple developers.

    Lint and Test

    Run unit test, E2E test, mypy lint, flake lint and isort check in the starwhale directory.

    make client-all-check

    Development environment for Cloud Instance

    Cloud Instance is written in Java(backend) and React+TypeScript(frontend).

    Development environment for Console

    Development environment for Server

    • Language: Java
    • Build tool: Maven
    • Development framework: Spring Boot+Mybatis
    • Unit test framework:Junit5
      • Mockito used for mocking
      • Hamcrest used for assertion
      • Testcontainers used for providing lightweight, throwaway instances of common databases, Selenium web browsers that can run in a Docker container.
    • Check style tool:use maven-checkstyle-plugin

    Server development environment prerequisites

    • OS: Linux, macOS or Windows
    • Docker: >=19.03
    • JDK: >=11
    • Maven: >=3.8.1
    • Mysql: >=8.0.29
    • Minio
    • Kubernetes cluster/Minikube(If you don't have a k8s cluster, you can use Minikube as an alternative for development and debugging)

    Modify the code and add unit tests

    Now you can enter the corresponding module to modify and adjust the code on the server side. The main business code directory is src/main/java, and the unit test directory is src/test/java.

    Execute code check and run unit tests

    cd starwhale/server
    mvn clean test

    Deploy the server at local machine

    • Dependent services that need to be deployed

      • Minikube(Optional. Minikube can be used when there is no k8s cluster, there is the installation doc: Minikube

        minikube start
        minikube addons enable ingress
        minikube addons enable ingress-dns
      • Mysql

        docker run --name sw-mysql -d \
        -p 3306:3306 \
        -e MYSQL_ROOT_PASSWORD=starwhale \
        -e MYSQL_USER=starwhale \
        -e MYSQL_PASSWORD=starwhale \
        -e MYSQL_DATABASE=starwhale \
        mysql:latest
      • Minio

        docker run --name minio -d \
        -p 9000:9000 --publish 9001:9001 \
        -e MINIO_DEFAULT_BUCKETS='starwhale' \
        -e MINIO_ROOT_USER="minioadmin" \
        -e MINIO_ROOT_PASSWORD="minioadmin" \
        bitnami/minio:latest
    • Package server program

      If you need to deploy the front-end at the same time when deploying the server, you can execute the build command of the front-end part first, and then execute 'mvn clean package', and the compiled front-end files will be automatically packaged.

      Use the following command to package the program

        cd starwhale/server
      mvn clean package
    • Specify the environment required for server startup

      # Minio env
      export SW_STORAGE_ENDPOINT=http://${Minio IP,default is:27.0.0.1}:9000
      export SW_STORAGE_BUCKET=${Minio bucket,default is:starwhale}
      export SW_STORAGE_ACCESSKEY=${Minio accessKey,default is:starwhale}
      export SW_STORAGE_SECRETKEY=${Minio secretKey,default is:starwhale}
      export SW_STORAGE_REGION=${Minio region,default is:local}
      # kubernetes env
      export KUBECONFIG=${the '.kube' file path}\.kube\config

      export SW_INSTANCE_URI=http://${Server IP}:8082
      export SW_METADATA_STORAGE_IP=${Mysql IP,default: 127.0.0.1}
      export SW_METADATA_STORAGE_PORT=${Mysql port,default: 3306}
      export SW_METADATA_STORAGE_DB=${Mysql dbname,default: starwhale}
      export SW_METADATA_STORAGE_USER=${Mysql user,default: starwhale}
      export SW_METADATA_STORAGE_PASSWORD=${user password,default: starwhale}
    • Deploy server service

      You can use the IDE or the command to deploy.

      java -jar controller/target/starwhale-controller-0.1.0-SNAPSHOT.jar
    • Debug

      there are two ways to debug the modified function:

      • Use swagger-ui for interface debugging, visit /swagger-ui/index.html to find the corresponding api
      • Debug the corresponding function directly in the ui (provided that the front-end code has been built in advance according to the instructions when packaging)
    - + \ No newline at end of file diff --git a/0.6.0/concepts/index.html b/0.6.0/concepts/index.html index cd14ad43c..8cb77eb59 100644 --- a/0.6.0/concepts/index.html +++ b/0.6.0/concepts/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    - + \ No newline at end of file diff --git a/0.6.0/concepts/names/index.html b/0.6.0/concepts/names/index.html index 2cbffb9b3..c91f15788 100644 --- a/0.6.0/concepts/names/index.html +++ b/0.6.0/concepts/names/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    Names in Starwhale

    Names mean project names, model names, dataset names, runtime names, and tag names.

    Names Limitation

    • Names are case-insensitive.
    • A name MUST only consist of letters A-Z a-z, digits 0-9, the hyphen character -, the dot character ., and the underscore character _.
    • A name should always start with a letter or the _ character.
    • The maximum length of a name is 80.

    Names uniqueness requirement

    • The resource name should be a unique string within its owner. For example, the project name should be unique in the owner instance, and the model name should be unique in the owner project.
    • The resource name can not be used by any other resource of the same kind in their owner, including those removed ones. For example, Project "apple" can not have two models named "Alice", even if one of them is already removed.
    • Different kinds of resources can have the same name. For example, a project and a model can be called "Alice" simultaneously.
    • Resources with different owners can have the same name. For example, a model in project "Apple" and a model in project "Banana" can have the same name "Alice".
    • Garbage-collected resources' names can be reused. For example, after the model with the name "Alice" in project "Apple" is removed and garbage collected, the project can have a new model with the same name "Alice".
    - + \ No newline at end of file diff --git a/0.6.0/concepts/project/index.html b/0.6.0/concepts/project/index.html index 9b6b026fd..50efd02ac 100644 --- a/0.6.0/concepts/project/index.html +++ b/0.6.0/concepts/project/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    Project in Starwhale

    "Project" is the basic unit for organizing different resources like models, datasets, etc. You may use projects for different purposes. For example, you can create a project for a data scientist team, a product line, or a specific model. Users usually work on one or more projects in their daily lives.

    Starwhale Server/Cloud projects are grouped by accounts. Starwhale Standalone does not have accounts. So you will not see any account name prefix in Starwhale Standalone projects. Starwhale Server/Cloud projects can be either "public" or "private". Public projects means all users on the same instance are assigned a "guest" role to the project by default. For more information about roles, see Roles and permissions in Starwhale.

    A self project is created automatically and configured as the default project in Starwhale Standalone.

    - + \ No newline at end of file diff --git a/0.6.0/concepts/roles-permissions/index.html b/0.6.0/concepts/roles-permissions/index.html index de4c96234..1f7263af9 100644 --- a/0.6.0/concepts/roles-permissions/index.html +++ b/0.6.0/concepts/roles-permissions/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    Roles and permissions in Starwhale

    Roles are used to assign permissions to users. Only Starwhale Server/Cloud has roles and permissions, and Starwhale Standalone does not.The Administrator role is automatically created and assigned to the user "admin". Some sensitive operations can only be performed by users with the Administrator role, for example, creating accounts in Starwhale Server.

    Projects have three roles:

    • Admin - Project administrators can read and write project data and assign project roles to users.
    • Maintainer - Project maintainers can read and write project data.
    • Guest - Project guests can only read project data.
    ActionAdminMaintainerGuest
    Manage project membersYes
    Edit projectYesYes
    View projectYesYesYes
    Create evaluationsYesYes
    Remove evaluationsYesYes
    View evaluationsYesYesYes
    Create datasetsYesYes
    Update datasetsYesYes
    Remove datasetsYesYes
    View datasetsYesYesYes
    Create modelsYesYes
    Update modelsYesYes
    Remove modelsYesYes
    View modelsYesYesYes
    Create runtimesYesYes
    Update runtimesYesYes
    Remove runtimesYesYes
    View runtimesYesYesYes

    The user who creates a project becomes the first project administrator. They can assign roles to other users later.

    - + \ No newline at end of file diff --git a/0.6.0/concepts/versioning/index.html b/0.6.0/concepts/versioning/index.html index 441686062..c4e2e34ed 100644 --- a/0.6.0/concepts/versioning/index.html +++ b/0.6.0/concepts/versioning/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    Resource versioning in Starwhale

    • Starwhale manages the history of all models, datasets, and runtimes. Every update to a specific resource appends a new version of the history.
    • Versions are identified by a version id which is a random string generated automatically by Starwhale and are ordered by their creation time.
    • Versions can have tags. Starwhale uses version tags to provide a human-friendly representation of versions. By default, Starwhale attaches a default tag to each version. The default tag is the letter "v", followed by a number. For each versioned resource, the first version tag is always tagged with "v0", the second version is tagged with "v1", and so on. And there is a special tag "latest" that always points to the last version. When a version is removed, its default tag will not be reused. For example, there is a model with tags "v0, v1, v2". When "v2" is removed, tags will be "v0, v1". And the following tag will be "v3" instead of "v2" again. You can attach your own tags to any version and remove them at any time.
    • Starwhale uses a linear history model. There is neither branch nor cycle in history.
    • History can not be rollback. When a version is to be reverted, Starwhale clones the version and appends it as a new version to the end of the history. Versions in history can be manually removed and recovered.
    - + \ No newline at end of file diff --git a/0.6.0/dataset/index.html b/0.6.0/dataset/index.html index b8cde9793..2379647fa 100644 --- a/0.6.0/dataset/index.html +++ b/0.6.0/dataset/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    Starwhale Dataset User Guide

    overview

    Design Overview

    Starwhale Dataset Positioning

    The Starwhale Dataset contains three core stages: data construction, data loading, and data visualization. It is a data management tool for the ML/DL field. Starwhale Dataset can directly use the environment built by Starwhale Runtime, and can be seamlessly integrated with Starwhale Model and Starwhale Evaluation. It is an important part of the Starwhale MLOps toolchain.

    According to the classification of MLOps Roles in Machine Learning Operations (MLOps): Overview, Definition, and Architecture, the three stages of Starwhale Dataset target the following user groups:

    • Data construction: Data Engineer, Data Scientist
    • Data loading: Data Scientist, ML Developer
    • Data visualization: Data Engineer, Data Scientist, ML Developer

    mlops-users

    Core Functions

    • Efficient loading: The original dataset files are stored in external storage such as OSS or NAS, and are loaded on demand without having to save to disk.
    • Simple construction: Supports one-click dataset construction from Image/Video/Audio directories, json files and Huggingface datasets, and also supports writing Python code to build completely custom datasets.
    • Versioning: Can perform version tracking, data append and other operations, and avoid duplicate data storage through the internally abstracted ObjectStore.
    • Sharing: Implement bidirectional dataset sharing between Standalone instances and Cloud/Server instances through the swcli dataset copy command.
    • Visualization: The web interface of Cloud/Server instances can present multi-dimensional, multi-type data visualization of datasets.
    • Artifact storage: The Standalone instance can store locally built or distributed swds series files, while the Cloud/Server instance uses object storage to provide centralized swds artifact storage.
    • Seamless Starwhale integration: Starwhale Dataset can use the runtime environment built by Starwhale Runtime to build datasets. Starwhale Evaluation and Starwhale Model can directly specify the dataset through the --dataset parameter to complete automatic data loading, which facilitates inference, model evaluation and other environments.

    Key Elements

    • swds virtual package file: swds is different from swmp and swrt. It is not a single packaged file, but a virtual concept that specifically refers to a directory that contains dataset-related files for a version of the Starwhale dataset, including _manifest.yaml, dataset.yaml, dataset build Python scripts, and data file links, etc. You can use the swcli dataset info command to view where the swds is located. swds is the abbreviation of Starwhale Dataset.

    swds-tree.png

    • swcli dataset command line: A set of dataset-related commands, including construction, distribution and management functions. See CLI Reference for details.
    • dataset.yaml configuration file: Describes the dataset construction process. It can be completely omitted and specified through swcli dataset build parameters. dataset.yaml can be considered as a configuration file representation of the swcli dataset build command line parameters. swcli dataset build parameters take precedence over dataset.yaml.
    • Dataset Python SDK: Includes data construction, data loading, and several predefined data types. See Python SDK for details.
    • Python scripts for dataset construction: A series of scripts written using the Starwhale Python SDK to build datasets.

    Best Practices

    The construction of Starwhale Dataset is performed independently. If third-party libraries need to be introduced when writing construction scripts, using Starwhale Runtime can simplify Python dependency management and ensure reproducible dataset construction. The Starwhale platform will build in as many open source datasets as possible for users to copy datasets for immediate use.

    Command Line Grouping

    The Starwhale Dataset command line can be divided into the following stages from the perspective of usage phases:

    • Construction phase
      • swcli dataset build
    • Visualization phase
      • swcli dataset diff
      • swcli dataset head
    • Distribution phase
      • swcli dataset copy
    • Basic management
      • swcli dataset tag
      • swcli dataset info
      • swcli dataset history
      • swcli dataset list
      • swcli dataset summary
      • swcli dataset remove
      • swcli dataset recover

    Starwhale Dataset Viewer

    Currently, the Web UI in the Cloud/Server instance can visually display the dataset. Currently, only DataTypes using the Python SDK can be correctly interpreted by the frontend, with mappings as follows:

    • Image: Display thumbnails, enlarged images, MASK type images, support image/png, image/jpeg, image/webp, image/svg+xml, image/gif, image/apng, image/avif formats.
    • Audio: Displayed as an audio wave graph, playable, supports audio/mp3 and audio/wav formats.
    • Video: Displayed as a video, playable, supports video/mp4, video/avi and video/webm formats.
    • GrayscaleImage: Display grayscale images, support x/grayscale format.
    • Text: Display text, support text/plain format, set encoding format, default is utf-8.
    • Binary and Bytes: Not supported for display currently.
    • Link: The above multimedia types all support specifying links as storage paths.

    Starwhale Dataset Data Format

    The dataset consists of multiple rows, each row being a sample, each sample containing several features. The features have a dict-like structure with some simple restrictions [L]:

    • The dict keys must be str type.
    • The dict values must be Python basic types like int/float/bool/str/bytes/dict/list/tuple, or Starwhale built-in data types.
    • For the same key across different samples, the value types do not need to stay the same.
    • If the value is a list or tuple, the element data types must be consistent.
    • For dict values, the restrictions are the same as [L].

    Example:

    {
    "img": GrayscaleImage(
    link=Link(
    "123",
    offset=32,
    size=784,
    _swds_bin_offset=0,
    _swds_bin_size=8160,
    )
    ),
    "label": 0,
    }

    File Data Handling

    Starwhale Dataset handles file type data in a special way. You can ignore this section if you don't care about Starwhale's implementation.

    According to actual usage scenarios, Starwhale Dataset has two ways of handling file class data that is based on the base class starwhale.BaseArtifact:

    • swds-bin: Starwhale merges the data into several large files in its own binary format (swds-bin), which can efficiently perform indexing, slicing and loading.
    • remote-link: If the user's original data is stored in some external storage such as OSS or NAS, with a lot of original data that is inconvenient to move or has already been encapsulated by some internal dataset implementation, then you only need to use links in the data to establish indexes.

    In the same Starwhale dataset, two types of data can be included simultaneously.

    - + \ No newline at end of file diff --git a/0.6.0/dataset/yaml/index.html b/0.6.0/dataset/yaml/index.html index 8c570fa57..b6f084b7b 100644 --- a/0.6.0/dataset/yaml/index.html +++ b/0.6.0/dataset/yaml/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    The dataset.yaml Specification

    tip

    dataset.yaml is optional for the swcli dataset build command.

    Building Starwhale Dataset uses dataset.yaml. Omitting dataset.yaml allows describing related configurations in swcli dataset build command line parameters. dataset.yaml can be considered as a file-based representation of the build command line configuration.

    YAML Field Descriptions

    FieldDescriptionRequiredTypeDefault
    nameName of the Starwhale DatasetYesString
    handlerImportable address of a class that inherits starwhale.SWDSBinBuildExecutor, starwhale.UserRawBuildExecutor or starwhale.BuildExecutor, or a function that returns a Generator or iterable object. Format is {module path}:{class name\|function name}YesString
    descDataset descriptionNoString""
    versiondataset.yaml format version, currently only "1.0" is supportedNoString1.0
    attrDataset build parametersNoDict
    attr.volume_sizeSize of each data file in the swds-bin dataset. Can be a number in bytes, or a number plus unit like 64M, 1GB etc.NoInt or Str64MB
    attr.alignment_sizeData alignment size of each data block in the swds-bin dataset. If set to 4k, and a data block is 7.9K, 0.1K padding will be added to make the block size a multiple of alignment_size, improving page size and read efficiency.NoInteger or String128

    Examples

    Simplest Example

    name: helloworld
    handler: dataset:ExampleProcessExecutor

    The helloworld dataset uses the ExampleProcessExecutor class in dataset.py of the dataset.yaml directory to build data.

    MNIST Dataset Build Example

    name: mnist
    handler: mnist.dataset:DatasetProcessExecutor
    desc: MNIST data and label test dataset
    attr:
    alignment_size: 128
    volume_size: 4M

    Example with handler as a generator function

    dataset.yaml contents:

    name: helloworld
    handler: dataset:iter_item

    dataset.py contents:

    def iter_item():
    for i in range(10):
    yield {"img": f"image-{i}".encode(), "label": i}
    - + \ No newline at end of file diff --git a/0.6.0/evaluation/heterogeneous/node-able/index.html b/0.6.0/evaluation/heterogeneous/node-able/index.html index b706d767f..f2091baff 100644 --- a/0.6.0/evaluation/heterogeneous/node-able/index.html +++ b/0.6.0/evaluation/heterogeneous/node-able/index.html @@ -10,7 +10,7 @@ - + @@ -23,7 +23,7 @@ Refer to the link.

    Take v0.13.0-rc.1 as an example:

    kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.13.0-rc.1/nvidia-device-plugin.yml

    Note: This operation will run the NVIDIA device plugin plugin on all Kubernetes nodes. If configured before, it will be updated. Please evaluate the image version used carefully.

  • Confirm GPU can be discovered and used in the cluster. Refer to the command below. Check that nvidia.com/gpu is in the Capacity of the Jetson node. The GPU is then recognized normally by the Kubernetes cluster.

    # kubectl describe node orin | grep -A15 Capacity
    Capacity:
    cpu: 12
    ephemeral-storage: 59549612Ki
    hugepages-1Gi: 0
    hugepages-2Mi: 0
    hugepages-32Mi: 0
    hugepages-64Ki: 0
    memory: 31357608Ki
    nvidia.com/gpu: 1
    pods: 110
  • Build and Use Custom Images

    The l4t-jetpack image mentioned earlier can meet our general use. If we need to customize a more streamlined image or one with more features, we can make it based on l4t-base. Relevant Dockerfiles can refer to the image Starwhale made for mnist.

    - + \ No newline at end of file diff --git a/0.6.0/evaluation/heterogeneous/virtual-node/index.html b/0.6.0/evaluation/heterogeneous/virtual-node/index.html index f2285bf69..a227afa6a 100644 --- a/0.6.0/evaluation/heterogeneous/virtual-node/index.html +++ b/0.6.0/evaluation/heterogeneous/virtual-node/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    Virtual Kubelet as Kubernetes nodes

    Introduction

    Virtual Kubelet is an open source framework that can simulate a K8s node by mimicking the communication between kubelet and the K8s cluster.

    This solution is widely used by major cloud vendors for serverless container cluster solutions, such as Alibaba Cloud's ASK, Amazon's AWS Fargate, etc.

    Principles

    The virtual kubelet framework implements the related interfaces of kubelet for Node. With simple configuration, it can simulate a node.

    We only need to implement the PodLifecycleHandler interface to support:

    • Create, update, delete Pod
    • Get Pod status
    • Get Container logs

    Adding Devices to the Cluster

    If our device cannot serve as a K8s node due to resource constraints or other situations, we can manage these devices by using virtual kubelet to simulate a proxy node.

    The control flow between Starwhale Controller and the device is as follows:


    ┌──────────────────────┐ ┌────────────────┐ ┌─────────────────┐ ┌────────────┐
    │ Starwhale Controller ├─────►│ K8s API Server ├────►│ virtual kubelet ├────►│ Our device │
    └──────────────────────┘ └────────────────┘ └─────────────────┘ └────────────┘

    Virtual kubelet converts the Pod orchestration information sent by Starwhale Controller into control behaviors for the device, such as executing a command via ssh on the device, or sending a message via USB or serial port.

    Below is an example of using virtual kubelet to control a device not joined to the cluster that is SSH-enabled:

    1. Prepare certificates
    • Create file vklet.csr with the following content:
    [req]
    req_extensions = v3_req
    distinguished_name = req_distinguished_name

    [req_distinguished_name]

    [v3_req]
    basicConstraints = CA:FALSE
    keyUsage = digitalSignature, keyEncipherment
    extendedKeyUsage = serverAuth
    subjectAltName = @alt_names

    [alt_names]
    IP = 1.2.3.4
    • Generate the certificate:
    openssl genrsa -out vklet-key.pem 2048
    openssl req -new -key vklet-key.pem -out vklet.csr -subj '/CN=system:node:1.2.3.4;/C=US/O=system:nodes' -config ./csr.conf
    • Submit the certificate:
    cat vklet.csr| base64 | tr -d "\n" # output as content of spec.request in csr.yaml

    csr.yaml:

    apiVersion: certificates.k8s.io/v1
    kind: CertificateSigningRequest
    metadata:
    name: vklet
    spec:
    request: ******************
    signerName: kubernetes.io/kube-apiserver-client
    expirationSeconds: 1086400
    usages:
    - client auth
    kubectl apply -f csr.yaml
    kubectl certificate approve vklet
    kubectl get csr vklet -o jsonpath='{.status.certificate}'| base64 -d > vklet-cert.pem

    Now we have vklet-cert.pem.

    • Compile virtual kubelet:
    git clone https://github.com/virtual-kubelet/virtual-kubelet
    cd virtual-kubelet && make build

    Create the node configuration file mock.json:

    {
    "virtual-kubelet":
    {
    "cpu": "100",
    "memory": "100Gi",
    "pods": "100"
    }
    }

    Start virtual kubelet:

    export APISERVER_CERT_LOCATION=/path/to/vklet-cert.pem
    export APISERVER_KEY_LOCATION=/path/to/vklet-key.pem
    export KUBECONFIG=/path/to/kubeconfig
    virtual-kubelet --provider mock --provider-config /path/to/mock.json

    Now we have simulated a node with 100 cores + 100GB memory using virtual kubelet.

    • Add PodLifecycleHandler implementation to convert important information in Pod orchestration into ssh command execution, and collect logs for Starwhale Controller to collect.

    See ssh executor for a concrete implementation.

    - + \ No newline at end of file diff --git a/0.6.0/evaluation/index.html b/0.6.0/evaluation/index.html index f4ceca99e..47a28cb5c 100644 --- a/0.6.0/evaluation/index.html +++ b/0.6.0/evaluation/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    Starwhale Model Evaluation

    Design Overview

    Starwhale Evaluation Positioning

    The goal of Starwhale Evaluation is to provide end-to-end management for model evaluation, including creating Jobs, distributing Tasks, viewing model evaluation reports and basic management. Starwhale Evaluation is a specific application of Starwhale Model, Starwhale Dataset, and Starwhale Runtime in the model evaluation scenario. Starwhale Evaluation is part of the MLOps toolchain built by Starwhale. More applications like Starwhale Model Serving, Starwhale Training will be included in the future.

    Core Features

    • Visualization: Both swcli and the Web UI provide visualization of model evaluation results, supporting comparison of multiple results. Users can also customize logging of intermediate processes.

    • Multi-scenario Adaptation: Whether it's a notebook, desktop or distributed cluster environment, the same commands, Python scripts, artifacts and operations can be used for model evaluation. This satisfies different computational power and data volume requirements.

    • Seamless Starwhale Integration: Leverage Starwhale Runtime for the runtime environment, Starwhale Dataset as data input, and run models from Starwhale Model. Configuration is simple whether using swcli, Python SDK or Cloud/Server instance Web UI.

    Key Elements

    • swcli model run: Command line for bulk offline model evaluation.
    • swcli model serve: Command line for online model evaluation.

    Best Practices

    Command Line Grouping

    From the perspective of completing an end-to-end Starwhale Evaluation workflow, commands can be grouped as:

    • Preparation Stage
      • swcli dataset build or Starwhale Dataset Python SDK
      • swcli model build or Starwhale Model Python SDK
      • swcli runtime build
    • Evaluation Stage
      • swcli model run
      • swcli model serve
    • Results Stage
      • swcli job info
    • Basic Management
      • swcli job list
      • swcli job remove
      • swcli job recover

    Abstraction job-step-task

    • job: A model evaluation task is a job, which contains one or more steps.

    • step: A step corresponds to a stage in the evaluation process. With the default PipelineHandler, steps are predict and evaluate. For custom evaluation processes using @handler, @evaluation.predict, @evaluation.evaluate decorators, steps are the decorated functions. Steps can have dependencies, forming a DAG. A step contains one or more tasks. Tasks in the same step have the same logic but different inputs. A common approach is to split the dataset into multiple parts, with each part passed to a task. Tasks can run in parallel.

    • task: A task is the final running entity. In Cloud/Server instances, a task is a container in a Pod. In Standalone instances, a task is a Python Thread.

    The job-step-task abstraction is the basis for implementing distributed runs in Starwhale Evaluation.

    - + \ No newline at end of file diff --git a/0.6.0/faq/index.html b/0.6.0/faq/index.html index a682c6db6..4522ca4f5 100644 --- a/0.6.0/faq/index.html +++ b/0.6.0/faq/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    - + \ No newline at end of file diff --git a/0.6.0/getting-started/cloud/index.html b/0.6.0/getting-started/cloud/index.html index b7c073232..6d4ecdaaf 100644 --- a/0.6.0/getting-started/cloud/index.html +++ b/0.6.0/getting-started/cloud/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    Getting started with Starwhale Cloud

    Starwhale Cloud is hosted on Aliyun with the domain name https://cloud.starwhale.cn. In the futher, we will launch the service on AWS with the domain name https://cloud.starwhale.ai. It's important to note that these are two separate instances that are not interconnected, and accounts and data are not shared. You can choose either one to get started.

    You need to install the Starwhale Client (swcli) at first.

    Sign Up for Starwhale Cloud and create your first project

    You can either directly log in with your GitHub or Weixin account or sign up for an account. You will be asked for an account name if you log in with your GitHub or Weixin account.

    Then you can create a new project. In this tutorial, we will use the name demo for the project name.

    Build the dataset, model, and runtime on your local machine

    Follow step 1 to step 4 in Getting started with Starwhale Standalone to create:

    • a Starwhale model named mnist
    • a Starwhale dataset named mnist
    • a Starwhale runtime named pytorch

    Login to the cloud instance

    swcli instance login --username <your account name> --password <your password> --alias swcloud https://cloud.starwhale.cn

    Copy the dataset, model, and runtime to the cloud instance

    swcli model copy mnist swcloud/project/<your account name>:demo
    swcli dataset copy mnist swcloud/project/<your account name>:demo
    swcli runtime copy pytorch swcloud/project/<your account name>:demo

    Run an evaluation with the web UI

    console-create-job.gif

    Congratulations! You have completed the Starwhale Cloud Getting Started Guide.

    - + \ No newline at end of file diff --git a/0.6.0/getting-started/index.html b/0.6.0/getting-started/index.html index 1175b11d3..88d427359 100644 --- a/0.6.0/getting-started/index.html +++ b/0.6.0/getting-started/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    Getting started

    First, you need to install the Starwhale Client (swcli), which can be done by running the following command:

    python3 -m pip install starwhale

    For more information, see the swcli installation guide.

    Depending on your instance type, there are three getting-started guides available for you:

    • Getting started with Starwhale Standalone - This guide helps you run an MNIST evaluation on your desktop PC/laptop. It is the fastest and simplest way to get started with Starwhale.
    • Getting started with Starwhale Server - This guide helps you install Starwhale Server in your private data center and run an MNIST evaluation. At the end of the tutorial, you will have a Starwhale Server instance where you can run model evaluations on and manage your datasets and models.
    • Getting started with Starwhale Cloud - This guide helps you create an account on Starwhale Cloud and run an MNIST evaluation. It is the easiest way to experience all Starwhale features.
    - + \ No newline at end of file diff --git a/0.6.0/getting-started/runtime/index.html b/0.6.0/getting-started/runtime/index.html index 08b591796..5022042cf 100644 --- a/0.6.0/getting-started/runtime/index.html +++ b/0.6.0/getting-started/runtime/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    Getting Started with Starwhale Runtime

    This article demonstrates how to build a Starwhale Runtime of the Pytorch environment and how to use it. This runtime can meet the dependency requirements of the six examples in Starwhale: mnist, speech commands, nmt, cifar10, ag_news, and PennFudan. Links to relevant code: example/runtime/pytorch.

    You can learn the following things from this tutorial:

    • How to build a Starwhale Runtime.
    • How to use a Starwhale Runtime in different scenarios.
    • How to release a Starwhale Runtime.

    Prerequisites

    Run the following command to clone the example code:

    git clone https://github.com/star-whale/starwhale.git
    cd starwhale/example/runtime/pytorch # for users in the mainland of China, use pytorch-cn-mirror instead.

    Build Starwhale Runtime

    ❯ swcli -vvv runtime build --yaml runtime.yaml

    Use Starwhale Runtime in the standalone instance

    Use Starwhale Runtime in the shell

    # Activate the runtime
    swcli runtime activate pytorch

    swcli runtime activate will download all python dependencies of the runtime, which may take a long time.

    All dependencies are ready in your python environment when the runtime is activated. It is similar to source venv/bin/activate of virtualenv or the conda activate command of conda. If you close the shell or switch to another shell, you need to reactivate the runtime.

    Use Starwhale Runtime in swcli

    # Use the runtime when building a Starwhale Model
    swcli model build . --runtime pytorch
    # Use the runtime when building a Starwhale Dataset
    swcli dataset build --yaml /path/to/dataset.yaml --runtime pytorch
    # Run a model evaluation with the runtime
    swcli model run --uri mnist/version/v0 --dataset mnist --runtime pytorch

    Copy Starwhale Runtime to another instance

    You can copy the runtime to a server/cloud instance, which can then be used in the server/cloud instance or downloaded by other users.

    # Copy the runtime to a server instance named 'pre-k8s'
    ❯ swcli runtime copy pytorch cloud://pre-k8s/project/starwhale
    - + \ No newline at end of file diff --git a/0.6.0/getting-started/server/index.html b/0.6.0/getting-started/server/index.html index 21a04241c..dd9f4861d 100644 --- a/0.6.0/getting-started/server/index.html +++ b/0.6.0/getting-started/server/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    Getting started with Starwhale Server

    Install Starwhale Server

    To install Starwhale Server, see the installation guide.

    Create your first project

    Login to the server

    Open your browser and enter your server's URL in the address bar. Login with your username(starwhale) and password(abcd1234).

    console-artifacts.gif

    Create a new project

    Build the dataset, model, and runtime on your local machine

    Follow step 1 to step 4 in Getting started with Starwhale Standalone to create:

    • a Starwhale model named mnist
    • a Starwhale dataset named mnist
    • a Starwhale runtime named pytorch

    Copy the dataset, the model, and the runtime to the server

    swcli instance login --username <your username> --password <your password> --alias server <Your Server URL>

    swcli model copy mnist server/project/demo
    swcli dataset copy mnist server/project/demo
    swcli runtime copy pytorch server/project/demo

    Use the Web UI to run an evaluation

    Navigate to the "demo" project in your browser and create a new one.

    console-create-job.gif

    Congratulations! You have completed the Starwhale Server Getting Started Guide.

    - + \ No newline at end of file diff --git a/0.6.0/getting-started/standalone/index.html b/0.6.0/getting-started/standalone/index.html index 7b9d7b9cd..c51de761b 100644 --- a/0.6.0/getting-started/standalone/index.html +++ b/0.6.0/getting-started/standalone/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    Getting started with Starwhale Standalone

    When the Starwhale Client (swcli) is installed, you are ready to use Starwhale Standalone.

    We also provide a Jupyter Notebook example, you can try it in Google Colab or in your local vscode/jupyterlab.

    Downloading Examples

    Download Starwhale examples by cloning the Starwhale project via:

    GIT_LFS_SKIP_SMUDGE=1 git clone https://github.com/star-whale/starwhale.git --depth 1
    cd starwhale

    To save time in the example downloading, we skip git-lfs and other commits info. We will use ML/DL HelloWorld code MNIST to start your Starwhale journey. The following steps are all performed in the starwhale directory.

    Core Workflow

    Building a Pytorch Runtime

    Runtime example codes are in the example/runtime/pytorch directory.

    • Build the Starwhale runtime bundle:

      swcli runtime build --yaml example/runtime/pytorch/runtime.yaml
      tip

      When you first build runtime, creating an isolated python environment and downloading python dependencies will take a lot of time. The command execution time is related to the network environment of the machine and the number of packages in the runtime.yaml. Using the befitting pypi mirror and cache config in the ~/.pip/pip.conf file is a recommended practice.

      For users in the mainland of China, the following conf file is an option:

      [global]
      cache-dir = ~/.cache/pip
      index-url = https://pypi.tuna.tsinghua.edu.cn/simple
      extra-index-url = https://mirrors.aliyun.com/pypi/simple/
    • Check your local Starwhale Runtime:

      swcli runtime list
      swcli runtime info pytorch

    Building a Model

    Model example codes are in the example/mnist directory.

    • Download the pre-trained model file:

      cd example/mnist
      make download-model
      # For users in the mainland of China, please add `CN=1` environment for make command:
      # CN=1 make download-model
      cd -
    • Build a Starwhale model:

      swcli model build example/mnist --runtime pytorch
    • Check your local Starwhale models:

      swcli model list
      swcli model info mnist

    Building a Dataset

    Dataset example codes are in the example/mnist directory.

    • Download the MNIST raw data:

      cd example/mnist
      make download-data
      # For users in the mainland of China, please add `CN=1` environment for make command:
      # CN=1 make download-data
      cd -
    • Build a Starwhale dataset:

      swcli dataset build --yaml example/mnist/dataset.yaml
    • Check your local Starwhale dataset:

      swcli dataset list
      swcli dataset info mnist
      swcli dataset head mnist

    Running an Evaluation Job

    • Create an evaluation job:

      swcli -vvv model run --uri mnist --dataset mnist --runtime pytorch
    • Check the evaluation result

      swcli job list
      swcli job info $(swcli job list | grep mnist | grep success | awk '{print $1}' | head -n 1)

    Congratulations! You have completed the Starwhale Standalone Getting Started Guide.

    - + \ No newline at end of file diff --git a/0.6.0/index.html b/0.6.0/index.html index 12b6f4712..0adac8a83 100644 --- a/0.6.0/index.html +++ b/0.6.0/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    What is Starwhale

    Overview

    Starwhale is an MLOps/LLMOps platform that make your model creation, evaluation and publication much easier. It aims to create a handy tool for data scientists and machine learning engineers.

    Starwhale helps you:

    • Keep track of your training/testing dataset history including data items and their labels, so that you can easily access them.
    • Manage your model packages that you can share across your team.
    • Run your models in different environments, either on a Nvidia GPU server or on an embedded device like Cherry Pi.
    • Create a online service with interactive Web UI for your models.

    Starwhale is designed to be an open platform. You can create your own plugins to meet your requirements.

    Deployment options

    Each deployment of Starwhale is called an instance. All instances can be managed by the Starwhale Client (swcli).

    You can start using Starwhale with one of the following instance types:

    • Starwhale Standalone - Rather than a running service, Starwhale Standalone is actually a repository that resides in your local file system. It is created and managed by the Starwhale Client (swcli). You only need to install swcli to use it. Currently, each user on a single machine can have only ONE Starwhale Standalone instance. We recommend you use the Starwhale Standalone to build and test your datasets, runtime, and models before pushing them to Starwhale Server/Cloud instances.
    • Starwhale Server - Starwhale Server is a service deployed on your local server. Besides text-only results from the Starwhale Client (swcli), Starwhale Server provides Web UI for you to manage your datasets and models, evaluate your models in your local Kubernetes cluster, and review the evaluation results.
    • Starwhale Cloud - Starwhale Cloud is a managed service hosted on public clouds. By registering an account on https://cloud.starwhale.cn, you are ready to use Starwhale without needing to install, operate, and maintain your own instances. Starwhale Cloud also provides public resources for you to download, like datasets, runtimes, and models. Check the "starwhale/public" project on Starwhale Cloud for more details.

    When choosing which instance type to use, consider the following:

    Instance TypeDeployment locationMaintained byUser InterfaceScalability
    Starwhale StandaloneYour laptop or any server in your data centerNot requiredCommand lineNot scalable
    Starwhale ServerYour data centerYourselfWeb UI and command lineScalable, depends on your Kubernetes cluster
    Starwhale CloudPublic cloud, like AWS or Aliyunthe Starwhale TeamWeb UI and command lineScalable, but currently limited by the freely available resource on the cloud
    - + \ No newline at end of file diff --git a/0.6.0/model/index.html b/0.6.0/model/index.html index e6152fca0..1f379dfb4 100644 --- a/0.6.0/model/index.html +++ b/0.6.0/model/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    Starwhale Model

    overview

    A Starwhale Model is a standard format for packaging machine learning models that can be used for various purposes, like model fine-tuning, model evaluation, and online serving. A Starwhale Model contains the model file, inference codes, configuration files, and any other files required to run the model.

    Create a Starwhale Model

    There are two ways to create a Starwhale Model: by swcli or by Python SDK.

    Create a Starwhale Model by swcli

    To create a Starwhale Model by swcli, you need to define a model.yaml, which describes some required information about the model package, and run the following command:

    swcli model build . --model-yaml /path/to/model.yaml

    For more information about the command and model.yaml, see the swcli reference. model.yaml is optional for model building.

    Create a Starwhale Model by Python SDK

    from starwhale import model, predict

    @predict
    def predict_img(data):
    ...

    model.build(name="mnist", modules=[predict_img])

    Model Management

    Model Management by swcli

    CommandDescription
    swcli model listList all Starwhale Models in a project
    swcli model infoShow detail information about a Starwhale Model
    swcli model copyCopy a Starwhale Model to another location
    swcli model removeRemove a Starwhale Model
    swcli model recoverRecover a previously removed Starwhale Model

    Model Management by WebUI

    Model History

    Starwhale Models are versioned. The general rules about versions are described in Resource versioning in Starwhale.

    Model History Management by swcli

    CommandDescription
    swcli model historyList all versions of a Starwhale Model
    swcli model infoShow detail information about a Starwhale Model version
    swcli model diffCompare two versions of a Starwhale model
    swcli model copyCopy a Starwhale Model version to a new one
    swcli model removeRemove a Starwhale Model version
    swcli model recoverRecover a previously removed Starwhale Model version

    Model Evaluation

    Model Evaluation by swcli

    CommandDescription
    swcli model runCreate an evaluation with a Starwhale Model

    The Storage Format

    The Starwhale Model is a tarball file that contains the source directory.

    - + \ No newline at end of file diff --git a/0.6.0/model/yaml/index.html b/0.6.0/model/yaml/index.html index c263de44a..0710cf5ce 100644 --- a/0.6.0/model/yaml/index.html +++ b/0.6.0/model/yaml/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    The model.yaml Specification

    tip

    model.yaml is optional for swcli model build.

    When building a Starwhale Model using the swcli model build command, you can specify a yaml file that follows a specific format via the --model-yaml parameter to simplify specifying build parameters.

    Even without specifying the --model-yaml parameter, swcli model build will automatically look for a model.yaml file under the ${workdir} directory and extract parameters from it. Parameters specified on the swcli model build command line take precedence over equivalent configurations in model.yaml, so you can think of model.yaml as a file-based representation of the build command line.

    When building a Starwhale Model using the Python SDK, the model.yaml file does not take effect.

    YAML Field Descriptions

    FieldDescriptionRequiredTypeDefault
    nameName of the Starwhale Model, equivalent to --name parameter.NoString
    run.modulesPython Modules searched during model build, can specify multiple entry points for model execution, format is Python Importable path. Equivalent to --module parameter.YesList[String]
    run.handlerDeprecated alias of run.modules, can only specify one entry point.NoString
    versiondataset.yaml format version, currently only supports "1.0"NoString1.0
    descModel description, equivalent to --desc parameter.NoString

    Example


    name: helloworld

    run:
    modules:
    - src.evaluator

    desc: "example yaml"

    A Starwhale model named helloworld, searches for functions decorated with @evaluation.predict, @evaluation.evaluate or @handler, or classes inheriting from PipelineHandler in src/evaluator.py under ${WORKDIR} of the swcli model build command. These functions or classes will be added to the list of runnable entry points for the Starwhale model. When running the model via swcli model run or Web UI, select the corresponding entry point (handler) to run.

    model.yaml is optional, parameters defined in yaml can also be specified via swcli command line parameters.


    swcli model build . --model-yaml model.yaml

    Is equivalent to:


    swcli model build . --name helloworld --module src.evaluator --desc "example yaml"

    - + \ No newline at end of file diff --git a/0.6.0/reference/sdk/dataset/index.html b/0.6.0/reference/sdk/dataset/index.html index 5c80b5af1..50556711c 100644 --- a/0.6.0/reference/sdk/dataset/index.html +++ b/0.6.0/reference/sdk/dataset/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    Starwhale Dataset SDK

    dataset

    Get starwhale.Dataset object, by creating new datasets or loading existing datasets.

    @classmethod
    def dataset(
    cls,
    uri: t.Union[str, Resource],
    create: str = _DatasetCreateMode.auto,
    readonly: bool = False,
    ) -> Dataset:

    Parameters

    • uri: (str or Resource, required)
      • The dataset uri or Resource object.
    • create: (str, optional)
      • The mode of dataset creating. The options are auto, empty and forbid.
        • auto mode: If the dataset already exists, creation is ignored. If it does not exist, the dataset is created automatically.
        • empty mode: If the dataset already exists, an Exception is raised; If it does not exist, an empty dataset is created. This mode ensures the creation of a new, empty dataset.
        • forbid mode: If the dataset already exists, nothing is done.If it does not exist, an Exception is raised. This mode ensures the existence of the dataset.
      • The default is auto.
    • readonly: (bool, optional)
      • For an existing dataset, you can specify the readonly=True argument to ensure the dataset is in readonly mode.
      • Default is False.

    Examples

    from starwhale import dataset, Image

    # create a new dataset named mnist, and add a row into the dataset
    # dataset("mnist") is equal to dataset("mnist", create="auto")
    ds = dataset("mnist")
    ds.exists() # return False, "mnist" dataset is not existing.
    ds.append({"img": Image(), "label": 1})
    ds.commit()
    ds.close()

    # load a cloud instance dataset in readonly mode
    ds = dataset("cloud://remote-instance/project/starwhale/dataset/mnist", readonly=True)
    labels = [row.features.label in ds]
    ds.close()

    # load a read/write dataset with a specified version
    ds = dataset("mnist/version/mrrdczdbmzsw")
    ds[0].features.label = 1
    ds.commit()
    ds.close()

    # create an empty dataset
    ds = dataset("mnist-empty", create="empty")

    # ensure the dataset existence
    ds = dataset("mnist-existed", create="forbid")

    class starwhale.Dataset

    starwhale.Dataset implements the abstraction of a Starwhale dataset, and can operate on datasets in Standalone/Server/Cloud instances.

    from_huggingface

    from_huggingface is a classmethod that can convert a Huggingface dataset into a Starwhale dataset.

    def from_huggingface(
    cls,
    name: str,
    repo: str,
    subset: str | None = None,
    split: str | None = None,
    revision: str = "main",
    alignment_size: int | str = D_ALIGNMENT_SIZE,
    volume_size: int | str = D_FILE_VOLUME_SIZE,
    mode: DatasetChangeMode | str = DatasetChangeMode.PATCH,
    cache: bool = True,
    tags: t.List[str] | None = None,
    ) -> Dataset:

    Parameters

    • name: (str, required)
      • dataset name.
    • repo: (str, required)
    • subset: (str, optional)
      • The subset name. If the huggingface dataset has multiple subsets, you must specify the subset name.
    • split: (str, optional)
      • The split name. If the split name is not specified, the all splits dataset will be built.
    • revision: (str, optional)
      • The huggingface datasets revision. The default value is main. If the split name is not specified, the all splits dataset will be built.
    • alignment_size: (int|str, optional)
      • The blob alignment size.
      • The default value is 128 Bytes.
    • volume_size: (int|str, optional)
      • The maximum size of a dataset blob file. A new blob file will be generated when the size exceeds this limit.
      • The default value is 64MB.
    • mode: (str|DatasetChangeMode, optional)
      • The dataset change mode. The default value is patch. Mode choices are patch and overwrite.
    • cache: (bool, optional)
      • Whether to use huggingface dataset cache(download + local hf dataset).
      • The default value is True.
    • tags: (List[str], optional)
      • The user custom tags of the dataset.

    Examples

    from starwhale import Dataset
    myds = Dataset.from_huggingface("mnist", "mnist")
    print(myds[0])
    from starwhale import Dataset
    myds = Dataset.from_huggingface("mmlu", "cais/mmlu", subset="anatomy", split="auxiliary_train", revision="7456cfb")

    from_json

    from_json is a classmethod that can convert a json text into a Starwhale dataset.

    @classmethod
    def from_json(
    cls,
    name: str,
    json_text: str,
    field_selector: str = "",
    alignment_size: int | str = D_ALIGNMENT_SIZE,
    volume_size: int | str = D_FILE_VOLUME_SIZE,
    mode: DatasetChangeMode | str = DatasetChangeMode.PATCH,
    tags: t.List[str] | None = None,
    ) -> Dataset:

    Parameters

    • name: (str, required)
      • Dataset name.
    • json_text: (str, required)
      • A json string. The from_json function deserializes this string into Python objects to start building the Starwhale dataset.
    • field_selector: (str, optional)
      • The filed from which you would like to extract dataset array items.
      • The default value is "" which indicates that the json object is an array contains all the items.
    • alignment_size: (int|str, optional)
      • The blob alignment size.
      • The default value is 128 Bytes.
    • volume_size: (int|str, optional)
      • The maximum size of a dataset blob file. A new blob file will be generated when the size exceeds this limit.
      • The default value is 64MB.
    • mode: (str|DatasetChangeMode, optional)
      • The dataset change mode. The default value is patch. Mode choices are patch and overwrite.
    • tags: (List[str], optional)
      • The user custom tags of the dataset.

    Examples

    from starwhale import Dataset
    myds = Dataset.from_json(
    name="translation",
    json_text='[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]'
    )
    print(myds[0].features.en)
    from starwhale import Dataset
    myds = Dataset.from_json(
    name="translation",
    json_text='{"content":{"child_content":[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]}}',
    field_selector="content.child_content"
    )
    print(myds[0].features["zh-cn"])

    from_folder

    from_folder is a classmethod that can read Image/Video/Audio data from a specified directory and automatically convert them into a Starwhale dataset. This function supports the following features:

    • It can recursively search the target directory and its subdirectories
    • Supports extracting three types of files:
      • image: Supports png/jpg/jpeg/webp/svg/apng image types. Image files will be converted to Starwhale.Image type.
      • video: Supports mp4/webm/avi video types. Video files will be converted to Starwhale.Video type.
      • audio: Supports mp3/wav audio types. Audio files will be converted to Starwhale.Audio type.
    • Each file corresponds to one record in the dataset, with the file stored in the file field.
    • If auto_label=True, the parent directory name will be used as the label for that record, stored in the label field. Files in the root directory will not be labeled.
    • If a txt file with the same name as an image/video/audio file exists, its content will be stored as the caption field in the dataset.
    • If metadata.csv or metadata.jsonl exists in the root directory, their content will be read automatically and associated to records by file path as meta information in the dataset.
      • metadata.csv and metadata.jsonl are mutually exclusive. An exception will be thrown if both exist.
      • Each record in metadata.csv and metadata.jsonl must contain a file_name field pointing to the file path.
      • metadata.csv and metadata.jsonl are optional for dataset building.
    @classmethod
    def from_folder(
    cls,
    folder: str | Path,
    kind: str | DatasetFolderSourceType,
    name: str | Resource = "",
    auto_label: bool = True,
    alignment_size: int | str = D_ALIGNMENT_SIZE,
    volume_size: int | str = D_FILE_VOLUME_SIZE,
    mode: DatasetChangeMode | str = DatasetChangeMode.PATCH,
    tags: t.List[str] | None = None,
    ) -> Dataset:

    Parameters

    • folder: (str|Path, required)
      • The folder path from which you would like to create this dataset.
    • kind: (str|DatasetFolderSourceType, required)
      • The dataset source type you would like to use, the choices are: image, video and audio.
      • Recursively searching for files of the specified kind in folder. Other file types will be ignored.
    • name: (str|Resource, optional)
      • The dataset name you would like to use.
      • If not specified, the name is the folder name.
    • auto_label: (bool, optional)
      • Whether to auto label by the sub-folder name.
      • The default value is True.
    • alignment_size: (int|str, optional)
      • The blob alignment size.
      • The default value is 128 Bytes.
    • volume_size: (int|str, optional)
      • The maximum size of a dataset blob file. A new blob file will be generated when the size exceeds this limit.
      • The default value is 64MB.
    • mode: (str|DatasetChangeMode, optional)
      • The dataset change mode. The default value is patch. Mode choices are patch and overwrite.
    • tags: (List[str], optional)
      • The user custom tags of the dataset.

    Examples ${folder-example}

    • Example for the normal function calling

      from starwhale import Dataset

      # create a my-image-dataset dataset from /path/to/image folder.
      ds = Dataset.from_folder(
      folder="/path/to/image",
      kind="image",
      name="my-image-dataset"
      )
    • Example for caption

      folder/dog/1.png
      folder/dog/1.txt

      1.txt content will be used as the caption of 1.png.

    • Example for metadata

      metadata.csv:

      file_name, caption
      1.png, dog
      2.png, cat

      metadata.jsonl:

      {"file_name": "1.png", "caption": "dog"}
      {"file_name": "2.png", "caption": "cat"}
    • Example for auto-labeling

      The following structure will create a dataset with 2 labels: "cat" and "dog", 4 images in total.

      folder/dog/1.png
      folder/cat/2.png
      folder/dog/3.png
      folder/cat/4.png

    __iter__

    __iter__ a method that iter the dataset rows.

    from starwhale import dataset

    ds = dataset("mnist")

    for item in ds:
    print(item.index)
    print(item.features.label) # label and img are the features of mnist.
    print(item.features.img)

    batch_iter

    batch_iter is a method that iter the dataset rows in batch.

    def batch_iter(
    self, batch_size: int = 1, drop_not_full: bool = False
    ) -> t.Iterator[t.List[DataRow]]:

    Parameters

    • batch_size: (int, optional)
      • batch size. The default value is 1.
    • drop_not_full: (bool, optional)
      • Whether the last batch of data, with a size smaller than batch_size, it will be discarded.
      • The default value is False.

    Examples

    from starwhale import dataset

    ds = dataset("mnist")
    for batch_rows in ds.batch_iter(batch_size=2):
    assert len(batch_rows) == 2
    print(batch_rows[0].features)

    __getitem__

    __getitem__ is a method that allows retrieving certain rows of data from the dataset, with usage similar to Python dict and list types.

    from starwhale import dataset

    ds = dataset("mock-int-index")

    # if the index type is string
    ds["str_key"] # get the DataRow by the "str_key" string key
    ds["start":"end"] # get a slice of the dataset by the range ("start", "end")

    ds = dataset("mock-str-index")
    # if the index type is int
    ds[1] # get the DataRow by the 1 int key
    ds[1:10:2] # get a slice of the dataset by the range (1, 10), step is 2

    __setitem__

    __setitem__ is a method that allows updating rows of data in the dataset, with usage similar to Python dicts. __setitem__ supports multi-threaded parallel data insertion.

    def __setitem__(
    self, key: t.Union[str, int], value: t.Union[DataRow, t.Tuple, t.Dict]
    ) -> None:

    Parameters

    • key: (int|str, required)
      • key is the index for each row in the dataset. The type is int or str, but a dataset only accepts one type.
    • value: (DataRow|tuple|dict, required)
      • value is the features for each row in the dataset, using a Python dict is generally recommended.

    Examples

    • Normal insertion

    Insert two rows into the test dataset, with index test and test2 repectively:

    from starwhale import dataset

    with dataset("test") as ds:
    ds["test"] = {"txt": "abc", "int": 1}
    ds["test2"] = {"txt": "bcd", "int": 2}
    ds.commit()
    • Parallel insertion
    from starwhale import dataset, Binary
    from concurrent.futures import as_completed, ThreadPoolExecutor

    ds = dataset("test")

    def _do_append(_start: int) -> None:
    for i in range(_start, 100):
    ds.append((i, {"data": Binary(), "label": i}))

    pool = ThreadPoolExecutor(max_workers=10)
    tasks = [pool.submit(_do_append, i * 10) for i in range(0, 9)]

    ds.commit()
    ds.close()

    __delitem__

    __delitem__ is a method to delete certain rows of data from the dataset.

    def __delitem__(self, key: _ItemType) -> None:
    from starwhale import dataset

    ds = dataset("existed-ds")
    del ds[6:9]
    del ds[0]
    ds.commit()
    ds.close()

    append

    append is a method to append data to a dataset, similar to the append method for Python lists.

    • Adding features dict, each row is automatically indexed with int starting from 0 and incrementing.

      from starwhale import dataset, Image

      with dataset("new-ds") as ds:
      for i in range(0, 100):
      ds.append({"label": i, "image": Image(f"folder/{i}.png")})
      ds.commit()
    • By appending the index and features dictionary, the index of each data row in the dataset will not be handled automatically.

      from dataset import dataset, Image

      with dataset("new-ds") as ds:
      for i in range(0, 100):
      ds.append((f"index-{i}", {"label": i, "image": Image(f"folder/{i}.png")}))

      ds.commit()

    extend

    extend is a method to bulk append data to a dataset, similar to the extend method for Python lists.

    from starwhale import dataset, Text

    ds = dataset("new-ds")
    ds.extend([
    (f"label-{i}", {"text": Text(), "label": i}) for i in range(0, 10)
    ])
    ds.commit()
    ds.close()

    commit

    commit is a method that flushes the current cached data to storage when called, and generates a dataset version. This version can then be used to load the corresponding dataset content afterwards.

    For a dataset, if some data is added without calling commit, but close is called or the process exits directly instead, the data will still be written to the dataset, just without generating a new version.

    @_check_readonly
    def commit(
    self,
    tags: t.Optional[t.List[str]] = None,
    message: str = "",
    force_add_tags: bool = False,
    ignore_add_tags_errors: bool = False,
    ) -> str:

    Parameters

    • tags: (list(str), optional)
      • tag as a list
    • message: (str, optional)
      • commit message. The default value is empty.
    • force_add_tags: (bool, optional)
      • For server/cloud instances, when adding labels to this version, if a label has already been applied to other dataset versions, you can use the force_add_tags=True parameter to forcibly add the label to this version, otherwise an exception will be thrown.
      • The default is False.
    • ignore_add_tags_errors: (bool, optional)
      • Ignore any exceptions thrown when adding labels.
      • The default is False.

    Examples

    from starwhale import dataset
    with dataset("mnist") as ds:
    ds.append({"label": 1})
    ds.commit(message="init commit")

    readonly

    readonly is a property attribute indicating if the dataset is read-only, it returns a bool value.

    from starwhale import dataset
    ds = dataset("mnist", readonly=True)
    assert ds.readonly

    loading_version

    loading_version is a property attribute, string type.

    • When loading an existing dataset, the loading_version is the related dataset version.
    • When creating a non-existed dataset, the loading_version is equal to the pending_commit_version.

    pending_commit_version

    pending_commit_version is a property attribute, string type. When you call the commit function, the pending_commit_version will be recorded in the Standalone instance ,Server instance or Cloud instance.

    committed_version

    committed_version is a property attribute, string type. After the commit function is called, the committed_version will come out, it is equal to the pending_commit_version. Accessing this attribute without calling commit first will raise an exception.

    remove

    remove is a method equivalent to the swcli dataset remove command, it can delete a dataset.

    def remove(self, force: bool = False) -> None:

    recover

    recover is a method equivalent to the swcli dataset recover command, it can recover a soft-deleted dataset that has not been run garbage collection.

    def recover(self, force: bool = False) -> None:

    summary

    summary is a method equivalent to the swcli dataset summary command, it returns summary information of the dataset.

    def summary(self) -> t.Optional[DatasetSummary]:

    history

    history is a method equivalent to the swcli dataset history command, it returns the history records of the dataset.

    def history(self) -> t.List[t.Dict]:

    flush

    flush is a method that flushes temporarily cached data from memory to persistent storage. The commit and close methods will automatically call flush.

    close

    close is a method that closes opened connections related to the dataset. Dataset also implements contextmanager, so datasets can be automatically closed using with syntax without needing to explicitly call close.

    from starwhale import dataset

    ds = dataset("mnist")
    ds.close()

    with dataset("mnist") as ds:
    print(ds[0])

    head is a method to show the first n rows of a dataset, equivalent to the swcli dataset head command.

    def head(self, n: int = 5, skip_fetch_data: bool = False) -> List[DataRow]:

    fetch_one

    fetch_one is a method to get the first record in a dataset, similar to head(n=1)[0].

    list

    list is a class method to list Starwhale datasets under a project URI, equivalent to the swcli dataset list command.

    @classmethod
    def list(
    cls,
    project_uri: Union[str, Project] = "",
    fullname: bool = False,
    show_removed: bool = False,
    page_index: int = DEFAULT_PAGE_IDX,
    page_size: int = DEFAULT_PAGE_SIZE,
    ) -> Tuple[DatasetListType, Dict[str, Any]]:

    copy

    copy is a method to copy a dataset to another instance, equivalent to the swcli dataset copy command.

    def copy(
    self,
    dest_uri: str,
    dest_local_project_uri: str = "",
    force: bool = False,
    mode: str = DatasetChangeMode.PATCH.value,
    ignore_tags: t.List[str] | None = None,
    ) -> None:

    Parameters

    • dest_uri: (str, required)
      • Dataset URI
    • dest_local_project_uri: (str, optional)
      • When copy the remote dataset into local, the parameter can set for the Project URI.
    • force: (bool, optional)
      • Whether to forcibly overwrite the dataset if there is already one with the same version on the target instance.
      • The default value is False.
      • When the tags are already used for the other dataset version in the dest instance, you should use force option or adjust the tags.
    • mode: (str, optional)
      • Dataset copy mode, default is 'patch'. Mode choices are: 'patch', 'overwrite'.
      • patch: Patch mode, only update the changed rows and columns for the remote dataset.
      • overwrite: Overwrite mode, update records and delete extraneous rows from the remote dataset.
    • ignore_tags (List[str], optional)
      • Ignore tags when copying.
      • In default, copy dataset with all user custom tags.
      • latest and ^v\d+$ are the system builtin tags, they are ignored automatically.

    Examples

    from starwhale import dataset
    ds = dataset("mnist")
    ds.copy("cloud://remote-instance/project/starwhale")

    to_pytorch

    to_pytorch is a method that can convert a Starwhale dataset to a Pytorch torch.utils.data.Dataset, which can then be passed to torch.utils.data.DataLoader for use.

    It should be noted that the to_pytorch function returns a Pytorch IterableDataset.

    def to_pytorch(
    self,
    transform: t.Optional[t.Callable] = None,
    drop_index: bool = True,
    skip_default_transform: bool = False,
    ) -> torch.utils.data.Dataset:

    Parameters

    • transform: (callable, optional)
      • A transform function for input data.
    • drop_index: (bool, optional)
      • Whether to drop the index column.
    • skip_default_transform: (bool, optional)
      • If transform is not set, by default the built-in Starwhale transform function will be used to transform the data. This can be disabled with the skip_default_transform parameter.

    Examples

    import torch.utils.data as tdata
    from starwhale import dataset

    ds = dataset("mnist")

    torch_ds = ds.to_pytorch()
    torch_loader = tdata.DataLoader(torch_ds, batch_size=2)
    import torch.utils.data as tdata
    from starwhale import dataset

    with dataset("mnist") as ds:
    for i in range(0, 10):
    ds.append({"txt": Text(f"data-{i}"), "label": i})

    ds.commit()

    def _custom_transform(data: t.Any) -> t.Any:
    data = data.copy()
    txt = data["txt"].to_str()
    data["txt"] = f"custom-{txt}"
    return data

    torch_loader = tdata.DataLoader(
    dataset(ds.uri).to_pytorch(transform=_custom_transform), batch_size=1
    )
    item = next(iter(torch_loader))
    assert isinstance(item["label"], torch.Tensor)
    assert item["txt"][0] in ("custom-data-0", "custom-data-1")

    to_tensorflow

    to_tensorflow is a method that can convert a Starwhale dataset to a Tensorflow tensorflow.data.Dataset.

    def to_tensorflow(self, drop_index: bool = True) -> tensorflow.data.Dataset:

    Parameters

    • drop_index: (bool, optional)
      • Whether to drop the index column.

    Examples

    from starwhale import dataset
    import tensorflow as tf

    ds = dataset("mnist")
    tf_ds = ds.to_tensorflow(drop_index=True)
    assert isinstance(tf_ds, tf.data.Dataset)

    with_builder_blob_config

    with_builder_blob_config is a method to set blob-related attributes in a Starwhale dataset. It needs to be called before making data changes.

    def with_builder_blob_config(
    self,
    volume_size: int | str | None = D_FILE_VOLUME_SIZE,
    alignment_size: int | str | None = D_ALIGNMENT_SIZE,
    ) -> Dataset:

    Parameters

    • alignment_size: (int|str, optional)
      • The blob alignment size.
      • The default value is 128 Bytes.
    • volume_size: (int|str, optional)
      • The maximum size of a dataset blob file. A new blob file will be generated when the size exceeds this limit.
      • The default value is 64MB.

    Examples

    from starwhale import dataset, Binary

    ds = dataset("mnist").with_builder_blob_config(volume_size="32M", alignment_size=128)
    ds.append({"data": Binary(b"123")})
    ds.commit()
    ds.close()

    with_loader_config

    with_loader_config is a method to set parameters for the Starwhale dataset loader process.

    def with_loader_config(
    self,
    num_workers: t.Optional[int] = None,
    cache_size: t.Optional[int] = None,
    field_transformer: t.Optional[t.Dict] = None,
    ) -> Dataset:

    Parameters

    • num_workers: (int, optional)
      • The workers number for loading dataset.
      • The default value is 2.
    • cache_size: (int, optional)
      • Prefetched data rows.
      • The default value is 20.
    • field_transformer: (dict, optional)
      • features name transform dict.

    Examples

    from starwhale import Dataset, dataset
    Dataset.from_json(
    "translation",
    '[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]'
    )
    myds = dataset("translation").with_loader_config(field_transformer={"en": "en-us"})
    assert myds[0].features["en-us"] == myds[0].features["en"]
    from starwhale import Dataset, dataset
    Dataset.from_json(
    "translation2",
    '[{"content":{"child_content":[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]}}]'
    )
    myds = dataset("translation2").with_loader_config(field_transformer={"content.child_content[0].en": "en-us"})
    assert myds[0].features["en-us"] == myds[0].features["content"]["child_content"][0]["en"]
    - + \ No newline at end of file diff --git a/0.6.0/reference/sdk/evaluation/index.html b/0.6.0/reference/sdk/evaluation/index.html index c4de3a50f..2a45f4aee 100644 --- a/0.6.0/reference/sdk/evaluation/index.html +++ b/0.6.0/reference/sdk/evaluation/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    Starwhale Model Evaluation SDK

    @evaluation.predict

    The @evaluation.predict decorator defines the inference process in the Starwhale Model Evaluation, similar to the map phase in MapReduce. It contains the following core features:

    • On the Server instance, require the resources needed to run.
    • Automatically read the local or remote datasets, and pass the data in the datasets one by one or in batches to the function decorated by evaluation.predict.
    • By the replicas setting, implement distributed dataset consumption to horizontally scale and shorten the time required for the model evaluation tasks.
    • Automatically store the return values of the function and the input features of the dataset into the results table, for display in the Web UI and further use in the evaluate phase.
    • The decorated function is called once for each single piece of data or each batch, to complete the inference process.

    Parameters

    • resources: (dict, optional)
      • Defines the resources required by each predict task when running on the Server instance, including memory, cpu, and nvidia.com/gpu.
      • memory: The unit is Bytes, int and float types are supported.
        • Supports setting request and limit as a dictionary, e.g. resources={"memory": {"request": 100 * 1024, "limit": 200 * 1024}}.
        • If only a single number is set, the Python SDK will automatically set request and limit to the same value, e.g. resources={"memory": 100 * 1024} is equivalent to resources={"memory": {"request": 100 * 1024, "limit": 100 * 1024}}.
      • cpu: The unit is the number of CPU cores, int and float types are supported.
        • Supports setting request and limit as a dictionary, e.g. resources={"cpu": {"request": 1, "limit": 2}}.
        • If only a single number is set, the SDK will automatically set request and limit to the same value, e.g. resources={"cpu": 1.5} is equivalent to resources={"cpu": {"request": 1.5, "limit": 1.5}}.
      • nvidia.com/gpu: The unit is the number of GPUs, int type is supported.
        • nvidia.com/gpu does not support setting request and limit, only a single number is supported.
      • Note: The resources parameter currently only takes effect on the Server instances. For the Cloud instances, the same can be achieved by selecting the corresponding resource pool when submitting the evaluation task. Standalone instances do not support this feature at all.
    • replicas: (int, optional)
      • The number of replicas to run predict.
      • predict defines a Step, in which there are multiple equivalent Tasks. Each Task runs on a Pod in Cloud/Server instances, and a Thread in Standalone instances.
      • When multiple replicas are specified, they are equivalent and will jointly consume the selected dataset to achieve distributed dataset consumption. It can be understood that a row in the dataset will only be read by one predict replica.
      • The default is 1.
    • batch_size: (int, optional)
      • Batch size for passing data from the dataset into the function.
      • The default is 1.
    • fail_on_error: (bool, optional)
      • Whether to interrupt the entire model evaluation when the decorated function throws an exception. If you expect some "exceptional" data to cause evaluation failures but don't want to interrupt the overall evaluation, you can set fail_on_error=False.
      • The default is True.
    • auto_log: (bool, optional)
      • Whether to automatically log the return values of the function and the input features of the dataset to the results table.
      • The default is True.
    • log_mode: (str, optional)
      • When auto_log=True, you can set log_mode to define logging the return values in plain or pickle format.
      • The default is pickle.
    • log_dataset_features: (List[str], optional)
      • When auto_log=True, you can selectively log certain features from the dataset via this parameter.
      • By default, all features will be logged.
    • needs: (List[Callable], optional)
      • Defines the prerequisites for this task to run, can use the needs syntax to implement DAG.
      • needs accepts functions decorated by @evaluation.predict, @evaluation.evaluate, and @handler.
      • The default is empty, i.e. does not depend on any other tasks.

    Input

    The decorated functions need to define some input parameters to accept dataset data, etc. They contain the following patterns:

    • data:

      • data is a dict type that can read the features of the dataset.
      • When batch_size=1 or batch_size is not set, the label feature can be read through data['label'] or data.label.
      • When batch_size is set to > 1, data is a list.
      from starwhale import evaluation

      @evaluation.predict
      def predict(data):
      print(data['label'])
      print(data.label)
    • data + external:

      • data is a dict type that can read the features of the dataset.
      • external is also a dict, including: index, index_with_dataset, dataset_info, context and dataset_uri keys. The attributes can be used for the further fine-grained processing.
        • index: The index of the dataset row.
        • index_with_dataset: The index with the dataset info.
        • dataset_info: starwhale.core.dataset.tabular.TabularDatasetInfo Class.
        • context: starwhale.Context Class.
        • dataset_uri: starwhale.nase.uri.resource.Resource Class.
      from starwhale import evaluation

      @evaluation.predict
      def predict(data, external):
      print(data['label'])
      print(data.label)
      print(external["context"])
      print(external["dataset_uri"])
    • data + **kw:

      • data is a dict type that can read the features of the dataset.
      • kw is a dict that contains external.
      from starwhale import evaluation

      @evaluation.predict
      def predict(data, **kw):
      print(kw["external"]["context"])
      print(kw["external"]["dataset_uri"])
    • *args + **kwargs:

      • The first argument of args list is data.
      from starwhale import evaluation

      @evaluation.predict
      def predict(*args, **kw):
      print(args[0].label)
      print(args[0]["label"])
      print(kw["external"]["context"])
    • **kwargs:

      from starwhale import evaluation

      @evaluation.predict
      def predict(**kw):
      print(kw["data"].label)
      print(kw["data"]["label"])
      print(kw["external"]["context"])
    • *args:

      • *args does not contain external.
      from starwhale import evaluation

      @evaluation.predict
      def predict(*args):
      print(args[0].label)
      print(args[0]["label"])

    Examples

    from starwhale import evaluation

    @evaluation.predict
    def predict_image(data):
    ...

    @evaluation.predict(
    dataset="mnist/version/latest",
    batch_size=32,
    replicas=4,
    needs=[predict_image],
    )
    def predict_batch_images(batch_data)
    ...

    @evaluation.predict(
    resources={"nvidia.com/gpu": 1,
    "cpu": {"request": 1, "limit": 2},
    "memory": 200 * 1024}, # 200MB
    log_mode="plain",
    )
    def predict_with_resources(data):
    ...

    @evaluation.predict(
    replicas=1,
    log_mode="plain",
    log_dataset_features=["txt", "img", "label"],
    )
    def predict_with_selected_features(data):
    ...

    @evaluation.evaluate

    @evaluation.evaluate is a decorator that defines the evaluation process in the Starwhale Model evaluation, similar to the reduce phase in MapReduce. It contains the following core features:

    • On the Server instance, apply for the resources.
    • Read the data recorded in the results table automatically during the predict phase, and pass it into the function as an iterator.
    • The evaluate phase will only run one replica, and cannot define the replicas parameter like the predict phase.

    Parameters

    • resources: (dict, optional)
      • Consistent with the resources parameter definition in @evaluation.predict.
    • needs: (List[Callable], optional)
      • Consistent with the needs parameter definition in @evaluation.predict.
      • In the common case, it will depend on a function decorated by @evaluation.predict.
    • use_predict_auto_log: (bool, optional)
      • Defaults to True, passes an iterator that can traverse the predict results to the function.

    Input

    • When use_predict_auto_log=True (default), pass an iterator that can traverse the predict results into the function.
      • The iterated object is a dictionary containing two keys: output and input.
        • output is the element returned by the predict stage function.
        • input is the features of the corresponding dataset during the inference process, which is a dictionary type.
    • When use_predict_auto_log=False, do not pass any parameters into the function.

    Examples

    from starwhale import evaluation

    @evaluation.evaluate(needs=[predict_image])
    def evaluate_results(predict_result_iter):
    ...

    @evaluation.evaluate(
    use_predict_auto_log=False,
    needs=[predict_image],
    )
    def evaluate_results():
    ...

    class Evaluation

    starwhale.Evaluation implements the abstraction for Starwhale Model Evaluation, and can perform operations like logging and scanning for Model Evaluation on Standalone/Server/Cloud instances, to record and retrieve metrics.

    __init__

    __init__ function initializes Evaluation object.

    class Evaluation
    def __init__(self, id: str, project: Project | str) -> None:

    Parameters

    • id: (str, required)
      • The UUID of Model Evaluation that is generated by Starwhale automatically.
    • project: (Project|str, required)
      • Project object or Project URI str.

    Example

    from starwhale import Evaluation

    standalone_e = Evaluation("fcd1206bf1694fce8053724861c7874c", project="self")
    server_e = Evaluation("fcd1206bf1694fce8053724861c7874c", project="cloud://server/project/starwhale:starwhale")
    cloud_e = Evaluation("2ddab20df9e9430dbd73853d773a9ff6", project="https://cloud.starwhale.cn/project/starwhale:llm-leaderboard")

    from_context

    from_context is a classmethod that obtains the Evaluation object under the current Context. from_context can only take effect under the task runtime environment. Calling this method in a non-task runtime environment will raise a RuntimeError exception, indicating that the Starwhale Context has not been properly set.

    @classmethod
    def from_context(cls) -> Evaluation:

    Example

    from starwhale import Evaluation

    with Evaluation.from_context() as e:
    e.log("label/1", 1, {"loss": 0.99, "accuracy": 0.98})

    log

    log is a method that logs evaluation metrics to a specific table, which can then be viewed on the Server/Cloud instance's web page or through the scan method.

    def log(
    self, category: str, id: t.Union[str, int], metrics: t.Dict[str, t.Any]
    ) -> None:

    Parameters

    • category: (str, required)
      • The category of the logged metrics, which will be used as the suffix of the Starwhale Datastore table name.
      • Each category corresponds to a Starwhale Datastore table. These tables will be isolated by the evaluation task ID and will not affect each other.
    • id: (str|int, required)
      • The ID of the logged record, unique within the table.
      • For the same table, only str or int can be used as the ID type.
    • metrics: (dict, required)
      • A dict to log metrics in key-value format.
      • Keys are of str type.
      • Values can be constant types like int, float, str, bytes, bool, or compound types like tuple, list, dict. It also supports logging Artifacts types like Starwhale.Image, Starwhale.Video, Starwhale.Audio, Starwhale.Text, Starwhale.Binary.
        • When the value contains dict type, the Starwhale SDK will automatically flatten the dict for better visualization and metric comparison.
        • For example, if metrics is {"test": {"loss": 0.99, "prob": [0.98,0.99]}, "image": [Image, Image]}, it will be stored as {"test/loss": 0.99, "test/prob": [0.98, 0.99], "image/0": Image, "image/1": Image} after flattening.

    Example

    from starwhale import Evaluation

    evaluation_store = Evaluation.from_context()

    evaluation_store.log("label/1", 1, {"loss": 0.99, "accuracy": 0.98})
    evaluation_store.log("ppl", "1", {"a": "test", "b": 1})

    scan

    scan is a method that returns an iterator for reading data from certain model evaluation tables.

    def scan(
    self,
    category: str,
    start: t.Any = None,
    end: t.Any = None,
    keep_none: bool = False,
    end_inclusive: bool = False,
    ) -> t.Iterator:

    Parameters

    • category: (str, required)
      • Same meaning as the category parameter in the log method.
    • start: (Any, optional)
      • Start key, if not specified, start from the first data item in the table.
    • end: (Any, optional)
      • End key, if not specified, iterate to the end of the table.
    • keep_none: (bool, optional)
      • Whether to return columns with None values, not returned by default.
    • end_inclusive: (bool, optional)
      • Whether to include the row corresponding to end, not included by default.

    Example

    from starwhale import Evaluation

    evaluation_store = Evaluation(id="2ddab20df9e9430dbd73853d773a9ff6", project="https://cloud.starwhale.cn/projects/349")
    results = [data for data in evaluation_store.scan("label/0")]

    flush

    flush is a method that can immediately flush the metrics logged by the log method to the datastore and oss storage. If the flush method is not called, Evaluation will automatically flush data to storage when it is finally closed.

    def flush(self, category: str, artifacts_flush: bool = True) -> None

    Parameters

    • category: (str, required)
      • Same meaning as the category parameter in the log method.
    • artifacts_flush: (bool, optional)
      • Whether to dump artifact data to blob files and upload them to related storage. Default is True.

    log_result

    log_result is a method that logs evaluation metrics to the results table, equivalent to calling the log method with category set to results. The results table is generally used to store inference results. By default, @starwhale.predict will store the return value of the decorated function in the results table, you can also manually store using log_results.

    def log_result(self, id: t.Union[str, int], metrics: t.Dict[str, t.Any]) -> None:

    Parameters

    • id: (str|int, required)
      • The ID of the record, unique within the results table.
      • For the results table, only str or int can be used as the ID type.
    • metrics: (dict, required)
      • Same definition as the metrics parameter in the log method.

    Example

    from starwhale import Evaluation

    evaluation_store = Evaluation(id="2ddab20df9e9430dbd73853d773a9ff6", project="self")
    evaluation_store.log_result(1, {"loss": 0.99, "accuracy": 0.98})
    evaluation_store.log_result(2, {"loss": 0.98, "accuracy": 0.99})

    scan_results

    scan_results is a method that returns an iterator for reading data from the results table.

    def scan_results(
    self,
    start: t.Any = None,
    end: t.Any = None,
    keep_none: bool = False,
    end_inclusive: bool = False,
    ) -> t.Iterator:

    Parameters

    • start: (Any, optional)
      • Start key, if not specified, start from the first data item in the table.
      • Same definition as the start parameter in the scan method.
    • end: (Any, optional)
      • End key, if not specified, iterate to the end of the table.
      • Same definition as the end parameter in the scan method.
    • keep_none: (bool, optional)
      • Whether to return columns with None values, not returned by default.
      • Same definition as the keep_none parameter in the scan method.
    • end_inclusive: (bool, optional)
      • Whether to include the row corresponding to end, not included by default.
      • Same definition as the end_inclusive parameter in the scan method.

    Example

    from starwhale import Evaluation

    evaluation_store = Evaluation(id="2ddab20df9e9430dbd73853d773a9ff6", project="self")

    evaluation_store.log_result(1, {"loss": 0.99, "accuracy": 0.98})
    evaluation_store.log_result(2, {"loss": 0.98, "accuracy": 0.99})
    results = [data for data in evaluation_store.scan_results()]

    flush_results

    flush_results is a method that can immediately flush the metrics logged by the log_results method to the datastore and oss storage. If the flush_results method is not called, Evaluation will automatically flush data to storage when it is finally closed.

    def flush_results(self, artifacts_flush: bool = True) -> None:

    Parameters

    • artifacts_flush: (bool, optional)
      • Whether to dump artifact data to blob files and upload them to related storage. Default is True.
      • Same definition as the artifacts_flush parameter in the flush method.

    log_summary

    log_summary is a method that logs certain metrics to the summary table. The evaluation page on Server/Cloud instances displays data from the summary table.

    Each time it is called, Starwhale will automatically update with the unique ID of this evaluation as the row ID of the table. This function can be called multiple times during one evaluation to update different columns.

    Each project has one summary table. All evaluation tasks under that project will write summary information to this table for easy comparison between evaluations of different models.

    def log_summary(self, *args: t.Any, **kw: t.Any) -> None:

    Same as log method, log_summary will automatically flatten the dict.

    Example

    from starwhale import Evaluation

    evaluation_store = Evaluation(id="2ddab20df9e9430dbd73853d773a9ff6", project="https://cloud.starwhale.cn/projects/349")

    evaluation_store.log_summary(loss=0.99)
    evaluation_store.log_summary(loss=0.99, accuracy=0.99)
    evaluation_store.log_summary({"loss": 0.99, "accuracy": 0.99})

    get_summary

    get_summary is a method that returns the information logged by log_summary.

    def get_summary(self) -> t.Dict:

    flush_summary

    flush_summary is a method that can immediately flush the metrics logged by the log_summary method to the datastore and oss storage. If the flush_results method is not called, Evaluation will automatically flush data to storage when it is finally closed.

    def flush_summary(self, artifacts_flush: bool = True) -> None:

    Parameters

    • artifacts_flush: (bool, optional)
      • Whether to dump artifact data to blob files and upload them to related storage. Default is True.
      • Same definition as the artifacts_flush parameter in the flush method.

    flush_all

    flush_all is a method that can immediately flush the metrics logged by log, log_results, log_summary methods to the datastore and oss storage. If the flush_all method is not called, Evaluation will automatically flush data to storage when it is finally closed.

    def flush_all(self, artifacts_flush: bool = True) -> None:

    Parameters

    • artifacts_flush: (bool, optional)
      • Whether to dump artifact data to blob files and upload them to related storage. Default is True.
      • Same definition as the artifacts_flush parameter in the flush method.

    get_tables

    get_tables is a method that returns the names of all tables generated during model evaluation. Note that this function does not return the summary table name.

    def get_tables(self) -> t.List[str]:

    close

    close is a method to close the Evaluation object. close will automatically flush data to storage when called. Evaluation also implements __enter__ and __exit__ methods, which can simplify manual close calls using with syntax.

    def close(self) -> None:

    Example

    from starwhale import Evaluation

    evaluation_store = Evaluation(id="2ddab20df9e9430dbd73853d773a9ff6", project="https://cloud.starwhale.cn/projects/349")
    evaluation_store.log_summary(loss=0.99)
    evaluation_store.close()

    # auto close when the with-context exits.
    with Evaluation.from_context() as e:
    e.log_summary(loss=0.99)

    @handler

    @handler is a decorator that provides the following functionalities:

    • On a Server instance, it requests the required resources to run.
    • It can control the number of replicas.
    • Multiple handlers can form a DAG through dependency relationships to control the execution workflow.
    • It can expose ports externally to run like a web handler.

    @fine_tune, @evaluation.predict and @evaluation.evalute can be considered applications of @handler in the certain specific areas. @handler is the underlying implementation of these decorators and is more fundamental and flexible.

    @classmethod
    def handler(
    cls,
    resources: t.Optional[t.Dict[str, t.Any]] = None,
    replicas: int = 1,
    needs: t.Optional[t.List[t.Callable]] = None,
    name: str = "",
    expose: int = 0,
    require_dataset: bool = False,
    ) -> t.Callable:

    Parameters

    • resources: (dict, optional)
      • Consistent with the resources parameter definition in @evaluation.predict.
    • needs: (List[Callable], optional)
      • Consistent with the needs parameter definition in @evaluation.predict.
    • replicas: (int, optional)
      • Consistent with the replicas parameter definition in @evaluation.predict.
    • name: (str, optional)
      • The name displayed for the handler.
      • If not specified, use the decorated function's name.
    • expose: (int, optional)
      • The port exposed externally. When running a web handler, the exposed port needs to be declared.
      • The default is 0, meaning no port is exposed.
      • Currently only one port can be exposed.
    • require_dataset: (bool, optional)
      • Defines whether this handler requires a dataset when running.
      • If required_dataset=True, the user is required to input a dataset when creating an evaluation task on the Server/Cloud instance web page. If required_dataset=False, the user does not need to specify a dataset on the web page.
      • The default is False.

    Examples

    from starwhale import handler
    import gradio

    @handler(resources={"cpu": 1, "nvidia.com/gpu": 1}, replicas=3)
    def my_handler():
    ...

    @handler(needs=[my_handler])
    def my_another_handler():
    ...

    @handler(expose=7860)
    def chatbot():
    with gradio.Blocks() as server:
    ...
    server.launch(server_name="0.0.0.0", server_port=7860)

    @fine_tune

    fine_tune is a decorator that defines the fine-tuning process for model training.

    Some restrictions and usage suggestions:

    • fine_tune has only one replica.
    • fine_tune requires dataset input.
    • Generally, the dataset is obtained through Context.get_runtime_context() at the start of fine_tune.
    • Generally, at the end of fine_tune, the fine-tuned Starwhale model package is generated through starwhale.model.build, which will be automatically copied to the corresponding evaluation project.

    Parameters

    • resources: (dict, optional)
      • Consistent with the resources parameter definition in @evaluation.predict.
    • needs: (List[Callable], optional)
      • Consistent with the needs parameter definition in @evaluation.predict.

    Examples

    from starwhale import model as starwhale_model
    from starwhale import fine_tune, Context

    @fine_tune(resources={"nvidia.com/gpu": 1})
    def llama_fine_tuning():
    ctx = Context.get_runtime_context()

    if len(ctx.dataset_uris) == 2:
    # TODO: use more graceful way to get train and eval dataset
    train_dataset = dataset(ctx.dataset_uris[0], readonly=True, create="forbid")
    eval_dataset = dataset(ctx.dataset_uris[1], readonly=True, create="forbid")
    elif len(ctx.dataset_uris) == 1:
    train_dataset = dataset(ctx.dataset_uris[0], readonly=True, create="forbid")
    eval_dataset = None
    else:
    raise ValueError("Only support 1 or 2 datasets(train and eval dataset) for now")

    #user training code
    train_llama(
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    )

    model_name = get_model_name()
    starwhale_model.build(name=f"llama-{model_name}-qlora-ft")

    @multi_classification

    The @multi_classification decorator uses the sklearn lib to analyze results for multi-classification problems, outputting the confusion matrix, ROC, AUC etc., and writing them to related tables in the Starwhale Datastore.

    When using it, certain requirements are placed on the return value of the decorated function, which should be (label, result) or (label, result, probability_matrix).

    def multi_classification(
    confusion_matrix_normalize: str = "all",
    show_hamming_loss: bool = True,
    show_cohen_kappa_score: bool = True,
    show_roc_auc: bool = True,
    all_labels: t.Optional[t.List[t.Any]] = None,
    ) -> t.Any:

    Parameters

    • confusion_matrix_normalize: (str, optional)
      • Accepts three parameters:
        • true: rows
        • pred: columns
        • all: rows+columns
    • show_hamming_loss: (bool, optional)
      • Whether to calculate the Hamming loss.
      • The default is True.
    • show_cohen_kappa_score: (bool, optional)
      • Whether to calculate the Cohen kappa score.
      • The default is True.
    • show_roc_auc: (bool, optional)
      • Whether to calculate ROC/AUC. To calculate, the function needs to return a (label, result, probability_matrix) tuple, otherwise a (label, result) tuple is sufficient.
      • The default is True.
    • all_labels: (List, optional)
      • Defines all the labels.

    Examples


    @multi_classification(
    confusion_matrix_normalize="all",
    show_hamming_loss=True,
    show_cohen_kappa_score=True,
    show_roc_auc=True,
    all_labels=[i for i in range(0, 10)],
    )
    def evaluate(ppl_result) -> t.Tuple[t.List[int], t.List[int], t.List[t.List[float]]]:
    label, result, probability_matrix = [], [], []
    return label, result, probability_matrix

    @multi_classification(
    confusion_matrix_normalize="all",
    show_hamming_loss=True,
    show_cohen_kappa_score=True,
    show_roc_auc=False,
    all_labels=[i for i in range(0, 10)],
    )
    def evaluate(ppl_result) -> t.Tuple[t.List[int], t.List[int], t.List[t.List[float]]]:
    label, result = [], [], []
    return label, result

    PipelineHandler

    The PipelineHandler class provides a default model evaluation workflow definition that requires users to implement the predict and evaluate functions.

    The PipelineHandler is equivalent to using the @evaluation.predict and @evaluation.evaluate decorators together - the usage looks different but the underlying model evaluation process is the same.

    Note that PipelineHandler currently does not support defining resources parameters.

    Users need to implement the following functions:

    • predict: Defines the inference process, equivalent to a function decorated with @evaluation.predict.

    • evaluate: Defines the evaluation process, equivalent to a function decorated with @evaluation.evaluate.

    from typing import Any, Iterator
    from abc import ABCMeta, abstractmethod

    class PipelineHandler(metaclass=ABCMeta):
    def __init__(
    self,
    predict_batch_size: int = 1,
    ignore_error: bool = False,
    predict_auto_log: bool = True,
    predict_log_mode: str = PredictLogMode.PICKLE.value,
    predict_log_dataset_features: t.Optional[t.List[str]] = None,
    **kwargs: t.Any,
    ) -> None:
    self.context = Context.get_runtime_context()
    ...

    def predict(self, data: Any, **kw: Any) -> Any:
    raise NotImplementedError

    def evaluate(self, ppl_result: Iterator) -> Any
    raise NotImplementedError

    Parameters

    • predict_batch_size: (int, optional)
      • Equivalent to the batch_size parameter in @evaluation.predict.
      • Default is 1.
    • ignore_error: (bool, optional)
      • Equivalent to the fail_on_error parameter in @evaluation.predict.
      • Default is False.
    • predict_auto_log: (bool, optional)
      • Equivalent to the auto_log parameter in @evaluation.predict.
      • Default is True.
    • predict_log_mode: (str, optional)
      • Equivalent to the log_mode parameter in @evaluation.predict.
      • Default is pickle.
    • predict_log_dataset_features: (bool, optional)
      • Equivalent to the log_dataset_features parameter in @evaluation.predict.
      • Default is None, which records all features.

    PipelineHandler.run Decorator

    The PipelineHandler.run decorator can be used to describe resources for the predict and evaluate methods, supporting definitions of replicas and resources:

    • The PipelineHandler.run decorator can only decorate predict and evaluate methods in subclasses inheriting from PipelineHandler.
    • The predict method can set the replicas parameter. The replicas value for the evaluate method is always 1.
    • The resources parameter is defined and used in the same way as the resources parameter in @evaluation.predict or @evaluation.evaluate.
    • The PipelineHandler.run decorator is optional.
    • The PipelineHandler.run decorator only takes effect on Server and Cloud instances, not Standalone instances that don't support resource definition.
    @classmethod
    def run(
    cls, resources: t.Optional[t.Dict[str, t.Any]] = None, replicas: int = 1
    ) -> t.Callable:

    Examples

    import typing as t

    import torch
    from starwhale import PipelineHandler

    class Example(PipelineHandler):
    def __init__(self) -> None:
    super().__init__()
    self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    self.model = self._load_model(self.device)

    @PipelineHandler.run(replicas=4, resources={"memory": 1 * 1024 * 1024 *1024, "nvidia.com/gpu": 1}) # 1G Memory, 1 GPU
    def predict(self, data: t.Dict):
    data_tensor = self._pre(data.img)
    output = self.model(data_tensor)
    return self._post(output)

    @PipelineHandler.run(resources={"memory": 1 * 1024 * 1024 *1024}) # 1G Memory
    def evaluate(self, ppl_result):
    result, label, pr = [], [], []
    for _data in ppl_result:
    label.append(_data["input"]["label"])
    result.extend(_data["output"][0])
    pr.extend(_data["output"][1])
    return label, result, pr

    def _pre(self, input: Image) -> torch.Tensor:
    ...

    def _post(self, input):
    ...

    def _load_model(self, device):
    ...

    Context

    The context information passed during model evaluation, including Project, Task ID, etc. The Context content is automatically injected and can be used in the following ways:

    • Inherit the PipelineHandler class and use the self.context object.
    • Get it through Context.get_runtime_context().

    Note that Context can only be used during model evaluation, otherwise the program will throw an exception.

    Currently Context can get the following values:

    • project: str
      • Project name.
    • version: str
      • Unique ID of model evaluation.
    • step: str
      • Step name.
    • total: int
      • Total number of Tasks under the Step.
    • index: int
      • Task index number, starting from 0.
    • dataset_uris: List[str]
      • List of Starwhale dataset URIs.

    Examples


    from starwhale import Context, PipelineHandler

    def func():
    ctx = Context.get_runtime_context()
    print(ctx.project)
    print(ctx.version)
    print(ctx.step)
    ...

    class Example(PipelineHandler):

    def predict(self, data: t.Dict):
    print(self.context.project)
    print(self.context.version)
    print(self.context.step)

    @starwhale.api.service.api

    @starwhale.api.service.api is a decorator that provides a simple Web Handler input definition based on Gradio for accepting external requests and returning inference results to the user when launching a Web Service with the swcli model serve command, enabling online evaluation.

    Examples

    import gradio
    from starwhale.api.service import api

    def predict_image(img):
    ...

    @api(gradio.File(), gradio.Label())
    def predict_view(file: t.Any) -> t.Any:
    with open(file.name, "rb") as f:
    data = Image(f.read(), shape=(28, 28, 1))
    _, prob = predict_image({"img": data})
    return {i: p for i, p in enumerate(prob)}

    starwhale.api.service.Service

    If you want to customize the web service implementation, you can subclass Service and override the serve method.

    class CustomService(Service):
    def serve(self, addr: str, port: int, handler_list: t.List[str] = None) -> None:
    ...

    svc = CustomService()

    @svc.api(...)
    def handler(data):
    ...

    Notes:

    • Handlers added with PipelineHandler.add_api and the api decorator or Service.api can work together
    • If using a custom Service, you need to instantiate the custom Service class in the model

    Custom Request and Response

    Request and Response are handler preprocessing and postprocessing classes for receiving user requests and returning results. They can be simply understood as pre and post logic for the handler.

    Starwhale provides built-in Request implementations for Dataset types and Json Response. Users can also customize the logic as follows:

    import typing as t

    from starwhale.api.service import (
    Request,
    Service,
    Response,
    )

    class CustomInput(Request):
    def load(self, req: t.Any) -> t.Any:
    return req

    class CustomOutput(Response):
    def __init__(self, prefix: str) -> None:
    self.prefix = prefix

    def dump(self, req: str) -> bytes:
    return f"{self.prefix} {req}".encode("utf-8")

    svc = Service()

    @svc.api(request=CustomInput(), response=CustomOutput("hello"))
    def foo(data: t.Any) -> t.Any:
    ...
    - + \ No newline at end of file diff --git a/0.6.0/reference/sdk/job/index.html b/0.6.0/reference/sdk/job/index.html index aef47cc44..4f3e1f065 100644 --- a/0.6.0/reference/sdk/job/index.html +++ b/0.6.0/reference/sdk/job/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    Starwhale Job SDK

    job

    Get a starwhale.Job object through the Job URI parameter, which represents a Job on Standalone/Server/Cloud instances.

    @classmethod
    def job(
    cls,
    uri: str,
    ) -> Job:

    Parameters

    • uri: (str, required)
      • Job URI format.

    Usage Example

    from starwhale import job

    # get job object of uri=https://server/job/1
    j1 = job("https://server/job/1")

    # get job from standalone instance
    j2 = job("local/project/self/job/xm5wnup")
    j3 = job("xm5wnup")

    class starwhale.Job

    starwhale.Job abstracts Starwhale Job and enables some information retrieval operations on the job.

    list

    list is a classmethod that can list the jobs under a project.

    @classmethod
    def list(
    cls,
    project: str = "",
    page_index: int = DEFAULT_PAGE_IDX,
    page_size: int = DEFAULT_PAGE_SIZE,
    ) -> Tuple[List[Job], Dict]:

    Parameters

    • project: (str, optional)
      • Project URI, can be projects on Standalone/Server/Cloud instances.
      • If project is not specified, the project selected by swcli project selected will be used.
    • page_index: (int, optional)
      • When getting the jobs list from Server/Cloud instances, paging is supported. This parameter specifies the page number.
        • Default is 1.
        • Page numbers start from 1.
      • Standalone instances do not support paging. This parameter has no effect.
    • page_size: (int, optional)
      • When getting the jobs list from Server/Cloud instances, paging is supported. This parameter specifies the number of jobs returned per page.
        • Default is 1.
        • Page numbers start from 1.
      • Standalone instances do not support paging. This parameter has no effect.

    Usage Example

    from starwhale import Job

    # list jobs of current selected project
    jobs, pagination_info = Job.list()

    # list jobs of starwhale/public project in the cloud.starwhale.cn instance
    jobs, pagination_info = Job.list("https://cloud.starwhale.cn/project/starwhale:public")

    # list jobs of id=1 project in the server instance, page index is 2, page size is 10
    jobs, pagination_info = Job.list("https://server/project/1", page_index=2, page_size=10)

    get

    get is a classmethod that gets information about a specific job and returns a Starwhale.Job object. It has the same functionality and parameter definitions as the starwhale.job function.

    Usage Example

    from starwhale import Job

    # get job object of uri=https://server/job/1
    j1 = Job.get("https://server/job/1")

    # get job from standalone instance
    j2 = Job.get("local/project/self/job/xm5wnup")
    j3 = Job.get("xm5wnup")

    summary

    summary is a property that returns the data written to the summary table during the job execution, in dict type.

    @property
    def summary(self) -> Dict[str, Any]:

    Usage Example

    from starwhale import jobs

    j1 = job("https://server/job/1")

    print(j1.summary)

    tables

    tables is a property that returns the names of tables created during the job execution (not including the summary table, which is created automatically at the project level), in list type.

    @property
    def tables(self) -> List[str]:

    Usage Example

    from starwhale import jobs

    j1 = job("https://server/job/1")

    print(j1.tables)

    get_table_rows

    get_table_rows is a method that returns records from a data table according to the table name and other parameters, in iterator type.

    def get_table_rows(
    self,
    name: str,
    start: Any = None,
    end: Any = None,
    keep_none: bool = False,
    end_inclusive: bool = False,
    ) -> Iterator[Dict[str, Any]]:

    Parameters

    • name: (str, required)
      • Datastore table name. The one of table names obtained through the tables property is ok.
    • start: (Any, optional)
      • The starting ID value of the returned records.
      • Default is None, meaning start from the beginning of the table.
    • end: (Any, optional)
      • The ending ID value of the returned records.
      • Default is None, meaning until the end of the table.
      • If both start and end are None, all records in the table will be returned as an iterator.
    • keep_none: (bool, optional)
      • Whether to return records with None values.
      • Default is False.
    • end_inclusive: (bool, optional)
      • When end is set, whether the iteration includes the end record.
      • Default is False.

    Usage Example

    from starwhale import job

    j = job("local/project/self/job/xm5wnup")

    table_name = j.tables[0]

    for row in j.get_table_rows(table_name):
    print(row)

    rows = list(j.get_table_rows(table_name, start=0, end=100))

    # return the first record from the results table
    result = list(j.get_table_rows('results', start=0, end=1))[0]

    status

    status is a property that returns the current real-time state of the Job as a string. The possible states are CREATED, READY, PAUSED, RUNNING, CANCELLING, CANCELED, SUCCESS, FAIL, and UNKNOWN.

    @property
    def status(self) -> str:

    create

    create is a classmethod that can create tasks on a Standalone instance or Server/Cloud instance, including tasks for Model Evaluation, Fine-tuning, Online Serving, and Developing. The function returns a Job object.

    • create determines which instance the generated task runs on through the project parameter, including Standalone and Server/Cloud instances.
    • On a Standalone instance, create creates a synchronously executed task.
    • On a Server/Cloud instance, create creates an asynchronously executed task.
    @classmethod
    def create(
    cls,
    project: Project | str,
    model: Resource | str,
    run_handler: str,
    datasets: t.List[str | Resource] | None = None,
    runtime: Resource | str | None = None,
    resource_pool: str = DEFAULT_RESOURCE_POOL,
    ttl: int = 0,
    dev_mode: bool = False,
    dev_mode_password: str = "",
    dataset_head: int = 0,
    overwrite_specs: t.Dict[str, t.Any] | None = None,
    ) -> Job:

    Parameters

    Parameters apply to all instances:

    • project: (Project|str, required)
      • A Project object or Project URI string.
    • model: (Resource|str, required)
      • Model URI string or Resource object of Model type, representing the Starwhale model package to run.
    • run_handler: (str, required)
      • The name of the runnable handler in the Starwhale model package, e.g. the evaluate handler of mnist: mnist.evaluator:MNISTInference.evaluate.
    • datasets: (List[str | Resource], optional)
      • Datasets required for the Starwhale model package to run, not required.

    Parameters only effective for Standalone instances:

    • dataset_head: (int, optional)
      • Generally used for debugging scenarios, only uses the first N data in the dataset for the Starwhale model to consume.

    Parameters only effective for Server/Cloud instances:

    • runtime: (Resource | str, optional)
      • Runtime URI string or Resource object of Runtime type, representing the Starwhale runtime required to run the task.
      • When not specified, it will try to use the built-in runtime of the Starwhale model package.
      • When creating tasks under a Standalone instance, the Python interpreter environment used by the Python script is used as its own runtime. Specifying a runtime via the runtime parameter is not supported. If you need to specify a runtime, you can use the swcli model run command.
    • resource_pool: (str, optional)
      • Specify which resource pool the task runs in, default to the default resource pool.
    • ttl: (int, optional)
      • Maximum lifetime of the task, will be killed after timeout.
      • The unit is seconds.
      • By default, ttl is 0, meaning no timeout limit, and the task will run as expected.
      • When ttl is less than 0, it also means no timeout limit.
    • dev_mode: (bool, optional)
      • Whether to set debug mode. After turning on this mode, you can enter the related environment through VSCode Web.
      • Debug mode is off by default.
    • dev_mode_password: (str, optional)
      • Login password for VSCode Web in debug mode.
      • Default is empty, in which case the task's UUID will be used as the password, which can be obtained via job.info().job.uuid.
    • overwrite_specs: (Dict[str, Any], optional)
      • Support setting the replicas and resources fields of the handler.
      • If empty, use the values set in the corresponding handler of the model package.
      • The key of overwrite_specs is the name of the handler, e.g. the evaluate handler of mnist: mnist.evaluator:MNISTInference.evaluate.
      • The value of overwrite_specs is the set value, in dictionary format, supporting settings for replicas and resources, e.g. {"replicas": 1, "resources": {"memory": "1GiB"}}.

    Examples

    • create a Cloud Instance job
    from starwhale import Job
    project = "https://cloud.starwhale.cn/project/starwhale:public"
    job = Job.create(
    project=project,
    model=f"{project}/model/mnist/version/v0",
    run_handler="mnist.evaluator:MNISTInference.evaluate",
    datasets=[f"{project}/dataset/mnist/version/v0"],
    runtime=f"{project}/runtime/pytorch",
    overwrite_specs={"mnist.evaluator:MNISTInference.evaluate": {"resources": "4GiB"},
    "mnist.evaluator:MNISTInference.predict": {"resources": "8GiB", "replicas": 10}}
    )
    print(job.status)
    • create a Standalone Instance job
    from starwhale import Job
    job = Job.create(
    project="self",
    model="mnist",
    run_handler="mnist.evaluator:MNISTInference.evaluate",
    datasets=["mnist"],
    )
    print(job.status)
    - + \ No newline at end of file diff --git a/0.6.0/reference/sdk/model/index.html b/0.6.0/reference/sdk/model/index.html index 4b848d8b6..339703ca6 100644 --- a/0.6.0/reference/sdk/model/index.html +++ b/0.6.0/reference/sdk/model/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    Starwhale Model SDK

    model.build

    model.build is a function that can build the Starwhale model, equivalent to the swcli model build command.

    def build(
    modules: t.Optional[t.List[t.Any]] = None,
    workdir: t.Optional[_path_T] = None,
    name: t.Optional[str] = None,
    project_uri: str = "",
    desc: str = "",
    remote_project_uri: t.Optional[str] = None,
    add_all: bool = False,
    tags: t.List[str] | None = None,
    ) -> None:

    Parameters

    • modules: (List[str|object], optional)
      • The search modules supports object(function, class or module) or str(example: "to.path.module", "to.path.module:object").
      • If the argument is not specified, the search modules are the imported modules.
    • name: (str, optional)
      • Starwhale Model name.
      • The default is the current work dir (cwd) name.
    • workdir: (str, Pathlib.Path, optional)
      • The path of the rootdir. The default workdir is the current working dir.
      • All files in the workdir will be packaged. If you want to ignore some files, you can add .swignore file in the workdir.
    • project_uri: (str, optional)
      • The project uri of the Starwhale Model.
      • If the argument is not specified, the project_uri is the config value of swcli project select command.
    • desc: (str, optional)
      • The description of the Starwhale Model.
    • remote_project_uri: (str, optional)
      • Project URI of another example instance. After the Starwhale model is built, it will be automatically copied to the remote instance.
    • add_all: (bool, optional)
      • Add all files in the working directory to the model package(excludes python cache files and virtual environment files when disabled).The .swignore file still takes effect.
      • The default value is False.
    • tags: (List[str], optional)
      • The tags for the model version.
      • latest and ^v\d+$ tags are reserved tags.

    Examples

    from starwhale import model

    # class search handlers
    from .user.code.evaluator import ExamplePipelineHandler
    model.build([ExamplePipelineHandler])

    # function search handlers
    from .user.code.evaluator import predict_image
    model.build([predict_image])

    # module handlers, @handler decorates function in this module
    from .user.code import evaluator
    model.build([evaluator])

    # str search handlers
    model.build(["user.code.evaluator:ExamplePipelineHandler"])
    model.build(["user.code1", "user.code2"])

    # no search handlers, use imported modules
    model.build()

    # add user custom tags
    model.build(tags=["t1", "t2"])
    - + \ No newline at end of file diff --git a/0.6.0/reference/sdk/other/index.html b/0.6.0/reference/sdk/other/index.html index a5d5b6641..ffc5dc314 100644 --- a/0.6.0/reference/sdk/other/index.html +++ b/0.6.0/reference/sdk/other/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    Other SDK

    __version__

    Version of Starwhale Python SDK and swcli, string constant.

    >>> from starwhale import __version__
    >>> print(__version__)
    0.5.7

    init_logger

    Initialize Starwhale logger and traceback. The default value is 0.

    • 0: show only errors, traceback only shows 1 frame.
    • 1: show errors + warnings, traceback shows 5 frames.
    • 2: show errors + warnings + info, traceback shows 10 frames.
    • 3: show errors + warnings + info + debug, traceback shows 100 frames.
    • >=4: show errors + warnings + info + debug + trace, traceback shows 1000 frames.
    def init_logger(verbose: int = 0) -> None:

    login

    Log in to a server/cloud instance. It is equivalent to running the swcli instance login command. Log in to the Standalone instance is meaningless.

    def login(
    instance: str,
    alias: str = "",
    username: str = "",
    password: str = "",
    token: str = "",
    ) -> None:

    Parameters

    • instance: (str, required)
      • The http url of the server/cloud instance.
    • alias: (str, optional)
      • An alias for the instance to simplify the instance part of the Starwhale URI.
      • If not specified, the hostname part of the instance http url will be used.
    • username: (str, optional)
    • password: (str, optional)
    • token: (str, optional)
      • You can only choose one of username + password or token to login to the instance.

    Examples

    from starwhale import login

    # login to Starwhale Cloud instance by token
    login(instance="https://cloud.starwhale.cn", alias="cloud-cn", token="xxx")

    # login to Starwhale Server instance by username and password
    login(instance="http://controller.starwhale.svc", alias="dev", username="starwhale", password="abcd1234")

    logout

    Log out of a server/cloud instance. It is equivalent to running the swcli instance logout command. Log out of the Standalone instance is meaningless.

    def logout(instance: str) -> None:

    Examples

    from starwhale import login, logout

    login(instance="https://cloud.starwhale.cn", alias="cloud-cn", token="xxx")
    # logout by the alias
    logout("cloud-cn")

    login(instance="http://controller.starwhale.svc", alias="dev", username="starwhale", password="abcd1234")
    # logout by the instance http url
    logout("http://controller.starwhale.svc")
    - + \ No newline at end of file diff --git a/0.6.0/reference/sdk/overview/index.html b/0.6.0/reference/sdk/overview/index.html index 856cd97cb..c77da8959 100644 --- a/0.6.0/reference/sdk/overview/index.html +++ b/0.6.0/reference/sdk/overview/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    Python SDK Overview

    Starwhale provides a series of Python SDKs to help manage datasets, models, evaluations etc. Using the Starwhale Python SDK can make it easier to complete your ML/DL development tasks.

    Classes

    • PipelineHandler: Provides default model evaluation process definition, requires implementation of predict and evaluate methods.
    • Context: Passes context information during model evaluation, including Project, Task ID etc.
    • class Dataset: Starwhale Dataset class.
    • class starwhale.api.service.Service: The base class of online evaluation.
    • class Job: Starwhale Job class.
    • class Evaluation: Starwhale Evaluation class.

    Functions

    • @multi_classification: Decorator for multi-class problems to simplify evaluate result calculation and storage for better evaluation presentation.
    • @handler: Decorator to define a running entity with resource attributes (mem/cpu/gpu). You can control replica count. Handlers can form DAGs through dependencies to control execution flow.
    • @evaluation.predict: Decorator to define inference process in model evaluation, similar to map phase in MapReduce.
    • @evaluation.evaluate: Decorator to define evaluation process in model evaluation, similar to reduce phase in MapReduce.
    • model.build: Build Starwhale model.
    • @fine_tune: Decorator to define model fine-tuning process.
    • init_logger: Set log level, implement 5-level logging.
    • dataset: Get starwhale.Dataset object, by creating new datasets or loading existing datasets.
    • @starwhale.api.service.api: Decorator to provide a simple Web Handler input definition based on Gradio.
    • login: Log in to the server/cloud instance.
    • logout: Log out of the server/cloud instance.
    • job: Get starwhale.Job object by the Job URI.
    • @PipelineHandler.run: Decorator to define the resources for the predict and evaluate methods in PipelineHandler subclasses.

    Data Types

    • COCOObjectAnnotation: Provides COCO format definitions.
    • BoundingBox: Bounding box type, currently in LTWH format - left_x, top_y, width and height.
    • ClassLabel: Describes the number and types of labels.
    • Image: Image type.
    • GrayscaleImage: Grayscale image type, e.g. MNIST digit images, a special case of Image type.
    • Audio: Audio type.
    • Video: Video type.
    • Text: Text type, default utf-8 encoding, for storing large texts.
    • Binary: Binary type, stored in bytes, for storing large binary content.
    • Line: Line type.
    • Point: Point type.
    • Polygon: Polygon type.
    • Link: Link type, for creating remote-link data.
    • MIMEType: Describes multimedia types supported by Starwhale, used in mime_type attribute of Image, Video etc for better Dataset Viewer.

    Other

    • __version__: Version of Starwhale Python SDK and swcli, string constant.

    Further reading

    - + \ No newline at end of file diff --git a/0.6.0/reference/sdk/type/index.html b/0.6.0/reference/sdk/type/index.html index a223df60d..24cf3e949 100644 --- a/0.6.0/reference/sdk/type/index.html +++ b/0.6.0/reference/sdk/type/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    Starwhale Data Types

    COCOObjectAnnotation

    It provides definitions following the COCO format.

    COCOObjectAnnotation(
    id: int,
    image_id: int,
    category_id: int,
    segmentation: Union[t.List, t.Dict],
    area: Union[float, int],
    bbox: Union[BoundingBox, t.List[float]],
    iscrowd: int,
    )
    ParameterDescription
    idObject id, usually a globally incrementing id
    image_idImage id, usually id of the image
    category_idCategory id, usually id of the class in object detection
    segmentationObject contour representation, Polygon (polygon vertices) or RLE format
    areaObject area
    bboxRepresents bounding box, can be BoundingBox type or list of floats
    iscrowd0 indicates a single object, 1 indicates two unseparated objects

    Examples

    def _make_coco_annotations(
    self, mask_fpath: Path, image_id: int
    ) -> t.List[COCOObjectAnnotation]:
    mask_img = PILImage.open(str(mask_fpath))

    mask = np.array(mask_img)
    object_ids = np.unique(mask)[1:]
    binary_mask = mask == object_ids[:, None, None]
    # TODO: tune permute without pytorch
    binary_mask_tensor = torch.as_tensor(binary_mask, dtype=torch.uint8)
    binary_mask_tensor = (
    binary_mask_tensor.permute(0, 2, 1).contiguous().permute(0, 2, 1)
    )

    coco_annotations = []
    for i in range(0, len(object_ids)):
    _pos = np.where(binary_mask[i])
    _xmin, _ymin = float(np.min(_pos[1])), float(np.min(_pos[0]))
    _xmax, _ymax = float(np.max(_pos[1])), float(np.max(_pos[0]))
    _bbox = BoundingBox(
    x=_xmin, y=_ymin, width=_xmax - _xmin, height=_ymax - _ymin
    )

    rle: t.Dict = coco_mask.encode(binary_mask_tensor[i].numpy()) # type: ignore
    rle["counts"] = rle["counts"].decode("utf-8")

    coco_annotations.append(
    COCOObjectAnnotation(
    id=self.object_id,
    image_id=image_id,
    category_id=1, # PennFudan Dataset only has one class-PASPersonStanding
    segmentation=rle,
    area=_bbox.width * _bbox.height,
    bbox=_bbox,
    iscrowd=0, # suppose all instances are not crowd
    )
    )
    self.object_id += 1

    return coco_annotations

    GrayscaleImage

    GrayscaleImage provides a grayscale image type. It is a special case of the Image type, for example the digit images in MNIST.

    GrayscaleImage(
    fp: _TArtifactFP = "",
    display_name: str = "",
    shape: Optional[_TShape] = None,
    as_mask: bool = False,
    mask_uri: str = "",
    )
    ParameterDescription
    fpImage path, IO object, or file content bytes
    display_nameDisplay name shown in Dataset Viewer
    shapeImage width and height, default channel is 1
    as_maskWhether used as a mask image
    mask_uriURI of the original image for the mask

    Examples

    for i in range(0, min(data_number, label_number)):
    _data = data_file.read(image_size)
    _label = struct.unpack(">B", label_file.read(1))[0]
    yield GrayscaleImage(
    _data,
    display_name=f"{i}",
    shape=(height, width, 1),
    ), {"label": _label}

    GrayscaleImage Functions

    GrayscaleImage.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    GrayscaleImage.carry_raw_data

    carry_raw_data() -> GrayscaleImage

    GrayscaleImage.astype

    astype() -> Dict[str, t.Any]

    BoundingBox

    BoundingBox provides a bounding box type, currently in LTWH format:

    • left_x: x-coordinate of left edge
    • top_y: y-coordinate of top edge
    • width: width of bounding box
    • height: height of bounding box

    So it represents the bounding box using the coordinates of its left, top, width and height. This is a common format for specifying bounding boxes in computer vision tasks.

    BoundingBox(
    x: float,
    y: float,
    width: float,
    height: float
    )
    ParameterDescription
    xx-coordinate of left edge (left_x)
    yy-coordinate of top edge (top_y)
    widthWidth of bounding box
    heightHeight of bounding box

    ClassLabel

    Describe labels.

    ClassLabel(
    names: List[Union[int, float, str]]
    )

    Image

    Image Type.

    Image(
    fp: _TArtifactFP = "",
    display_name: str = "",
    shape: Optional[_TShape] = None,
    mime_type: Optional[MIMEType] = None,
    as_mask: bool = False,
    mask_uri: str = "",
    )
    ParameterDescription
    fpImage path, IO object, or file content bytes
    display_nameDisplay name shown in Dataset Viewer
    shapeImage width, height and channels
    mime_typeMIMEType supported types
    as_maskWhether used as a mask image
    mask_uriURI of the original image for the mask

    The main difference from GrayscaleImage is that Image supports multi-channel RGB images by specifying shape as (W, H, C).

    Examples

    import io
    import typing as t
    import pickle
    from PIL import Image as PILImage
    from starwhale import Image, MIMEType

    def _iter_item(paths: t.List[Path]) -> t.Generator[t.Tuple[t.Any, t.Dict], None, None]:
    for path in paths:
    with path.open("rb") as f:
    content = pickle.load(f, encoding="bytes")
    for data, label, filename in zip(
    content[b"data"], content[b"labels"], content[b"filenames"]
    ):
    annotations = {
    "label": label,
    "label_display_name": dataset_meta["label_names"][label],
    }

    image_array = data.reshape(3, 32, 32).transpose(1, 2, 0)
    image_bytes = io.BytesIO()
    PILImage.fromarray(image_array).save(image_bytes, format="PNG")

    yield Image(
    fp=image_bytes.getvalue(),
    display_name=filename.decode(),
    shape=image_array.shape,
    mime_type=MIMEType.PNG,
    ), annotations

    Image Functions

    Image.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Image.carry_raw_data

    carry_raw_data() -> GrayscaleImage

    Image.astype

    astype() -> Dict[str, t.Any]

    Video

    Video type.

    Video(
    fp: _TArtifactFP = "",
    display_name: str = "",
    mime_type: Optional[MIMEType] = None,
    )
    ParameterDescription
    fpVideo path, IO object, or file content bytes
    display_nameDisplay name shown in Dataset Viewer
    mime_typeMIMEType supported types

    Examples

    import typing as t
    from pathlib import Path

    from starwhale import Video, MIMEType

    root_dir = Path(__file__).parent.parent
    dataset_dir = root_dir / "data" / "UCF-101"
    test_ds_path = [root_dir / "data" / "test_list.txt"]

    def iter_ucf_item() -> t.Generator:
    for path in test_ds_path:
    with path.open() as f:
    for line in f.readlines():
    _, label, video_sub_path = line.split()

    data_path = dataset_dir / video_sub_path
    data = Video(
    data_path,
    display_name=video_sub_path,
    shape=(1,),
    mime_type=MIMEType.WEBM,
    )

    yield f"{label}_{video_sub_path}", {
    "video": data,
    "label": label,
    }

    Audio

    Audio type.

    Audio(
    fp: _TArtifactFP = "",
    display_name: str = "",
    mime_type: Optional[MIMEType] = None,
    )
    ParameterDescription
    fpAudio path, IO object, or file content bytes
    display_nameDisplay name shown in Dataset Viewer
    mime_typeMIMEType supported types

    Examples

    import typing as t
    from starwhale import Audio

    def iter_item() -> t.Generator[t.Tuple[t.Any, t.Any], None, None]:
    for path in validation_ds_paths:
    with path.open() as f:
    for item in f.readlines():
    item = item.strip()
    if not item:
    continue

    data_path = dataset_dir / item
    data = Audio(
    data_path, display_name=item, shape=(1,), mime_type=MIMEType.WAV
    )

    speaker_id, utterance_num = data_path.stem.split("_nohash_")
    annotations = {
    "label": data_path.parent.name,
    "speaker_id": speaker_id,
    "utterance_num": int(utterance_num),
    }
    yield data, annotations

    Audio Functions

    Audio.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Audio.carry_raw_data

    carry_raw_data() -> Audio

    Audio.astype

    astype() -> Dict[str, t.Any]

    Text

    Text type, the default encode type is utf-8.

    Text(
    content: str,
    encoding: str = "utf-8",
    )
    ParameterDescription
    contentThe text content
    encodingEncoding format of the text

    Examples

    import typing as t
    from pathlib import Path
    from starwhale import Text

    def iter_item(self) -> t.Generator[t.Tuple[t.Any, t.Any], None, None]:
    root_dir = Path(__file__).parent.parent / "data"

    with (root_dir / "fra-test.txt").open("r") as f:
    for line in f.readlines():
    line = line.strip()
    if not line or line.startswith("CC-BY"):
    continue

    _data, _label, *_ = line.split("\t")
    data = Text(_data, encoding="utf-8")
    annotations = {"label": _label}
    yield data, annotations

    Text Functions

    to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Text.carry_raw_data

    carry_raw_data() -> Text

    Text.astype

    astype() -> Dict[str, t.Any]

    Text.to_str

    to_str() -> str

    Binary

    Binary provides a binary data type, stored as bytes.

    Binary(
    fp: _TArtifactFP = "",
    mime_type: MIMEType = MIMEType.UNDEFINED,
    )
    ParameterDescription
    fpPath, IO object, or file content bytes
    mime_typeMIMEType supported types

    Binary Functions

    Binary.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Binary.carry_raw_data

    carry_raw_data() -> Binary

    Binary.astype

    astype() -> Dict[str, t.Any]

    Link provides a link type to create remote-link datasets in Starwhale.

    Link(
    uri: str,
    auth: Optional[LinkAuth] = DefaultS3LinkAuth,
    offset: int = 0,
    size: int = -1,
    data_type: Optional[BaseArtifact] = None,
    )
    ParameterDescription
    uriURI of the original data, currently supports localFS and S3 protocols
    authLink auth information
    offsetData offset relative to file pointed by uri
    sizeData size
    data_typeActual data type pointed by the link, currently supports Binary, Image, Text, Audio and Video

    Link.astype

    astype() -> Dict[str, t.Any]

    MIMEType

    MIMEType describes the multimedia types supported by Starwhale, implemented using Python Enum. It is used in the mime_type attribute of Image, Video etc to enable better Dataset Viewer support.

    class MIMEType(Enum):
    PNG = "image/png"
    JPEG = "image/jpeg"
    WEBP = "image/webp"
    SVG = "image/svg+xml"
    GIF = "image/gif"
    APNG = "image/apng"
    AVIF = "image/avif"
    PPM = "image/x-portable-pixmap"
    MP4 = "video/mp4"
    AVI = "video/avi"
    WEBM = "video/webm"
    WAV = "audio/wav"
    MP3 = "audio/mp3"
    PLAIN = "text/plain"
    CSV = "text/csv"
    HTML = "text/html"
    GRAYSCALE = "x/grayscale"
    UNDEFINED = "x/undefined"

    Line

    from starwhale import ds, Point, Line

    with dataset("collections") as ds:
    line_points = [
    Point(x=0.0, y=1.0),
    Point(x=0.0, y=100.0)
    ]
    ds.append({"line": line_points})
    ds.commit()

    Point

    from starwhale import ds, Point

    with dataset("collections") as ds:
    ds.append(Point(x=0.0, y=100.0))
    ds.commit()

    Polygon

    from starwhale import ds, Point, Polygon

    with dataset("collections") as ds:
    polygon_points = [
    Point(x=0.0, y=1.0),
    Point(x=0.0, y=100.0),
    Point(x=2.0, y=1.0),
    Point(x=2.0, y=100.0),
    ]
    ds.append({"polygon": polygon_points})
    ds.commit()
    - + \ No newline at end of file diff --git a/0.6.0/reference/swcli/dataset/index.html b/0.6.0/reference/swcli/dataset/index.html index 90717c011..304d27cd1 100644 --- a/0.6.0/reference/swcli/dataset/index.html +++ b/0.6.0/reference/swcli/dataset/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    swcli dataset

    Overview

    swcli [GLOBAL OPTIONS] dataset [OPTIONS] <SUBCOMMAND> [ARGS]...

    The dataset command includes the following subcommands:

    • build
    • copy(cp)
    • diff
    • head
    • history
    • info
    • list(ls)
    • recover
    • remove(rm)
    • summary
    • tag

    swcli dataset build

    swcli [GLOBAL OPTIONS] dataset build [OPTIONS]

    Build Starwhale Dataset. This command only supports to build standalone dataset.

    Options

    • Data sources options:
    OptionRequiredTypeDefaultsDescription
    -if or --image or --image-folderNStringBuild dataset from image folder, the folder should contain the image files.
    -af or --audio or --audio-folderNStringBuild dataset from audio folder, the folder should contain the audio files.
    -vf or --video or --video-folderNStringBuild dataset from video folder, the folder should contain the video files.
    -h or --handler or --python-handlerNStringBuild dataset from python executor handler, the handler format is [module path]:[class or func name].
    -f or --yaml or --dataset-yamlNdataset.yaml in cwdBuild dataset from dataset.yaml file. Default uses dataset.yaml in the work directory(cwd).
    -jf or --jsonNStringBuild dataset from json or jsonl file, the json or jsonl file option is a json file path or a http downloaded url.The json content structure should be a list[dict] or tuple[dict].
    -hf or --huggingfaceNStringBuild dataset from huggingface dataset, the huggingface option is a huggingface repo name.
    -c or --csvNStringBuild dataset from csv files. The option is a csv file path, dir path or a http downloaded url.The option can be used multiple times.

    Data source options are mutually exclusive, only one option is accepted. If no set, swcli dataset build command will use dataset yaml mode to build dataset with the dataset.yaml in the cwd.

    • Other options:
    OptionRequiredScopeTypeDefaultsDescription
    -pt or --patchone of --patch and --overwriteGlobalBooleanTruePatch mode, only update the changed rows and columns for the existed dataset.
    -ow or --overwriteone of --patch and --overwriteGlobalBooleanFalseOverwrite mode, update records and delete extraneous rows from the existed dataset.
    -n or --nameNGlobalStringDataset name
    -p or --projectNGlobalStringDefault projectProject URI, the default is the current selected project. The dataset will store in the specified project.
    -d or --descNGlobalStringDataset description
    -as or --alignment-sizeNGlobalString128Bswds-bin format dataset: alignment size
    -vs or --volume-sizeNGlobalString64MBswds-bin format dataset: volume size
    -r or --runtimeNGlobalStringRuntime URI
    -w or --workdirNPython Handler ModeStringcwdwork dir to search handler.
    --auto-label/--no-auto-labelNImage/Video/Audio Folder ModeBooleanTrueWhether to auto label by the sub-folder name.
    --field-selectorNJSON File ModeStringThe filed from which you would like to extract dataset array items. The filed is split by the dot(.) symbol.
    --subsetNHuggingface ModeStringHuggingface dataset subset name. If the subset name is not specified, the all subsets will be built.
    --splitNHuggingface ModeStringHuggingface dataset split name. If the split name is not specified, the all splits will be built.
    --revisionNHuggingface ModeStringmainVersion of the dataset script to load. Defaults to 'main'. The option value accepts tag name, or branch name, or commit hash.
    --add-hf-info/--no-add-hf-infoNHuggingface ModeBooleanTrueWhether to add huggingface dataset info to the dataset rows, currently support to add subset and split into the dataset rows. Subset uses _hf_subset field name, split uses _hf_split field name.
    --cache/--no-cacheNHuggingface ModeBooleanTrueWhether to use huggingface dataset cache(download + local hf dataset).
    -t or --tagNGlobalStringDataset tags, the option can be used multiple times.
    --encodingNCSV/JSON/JSONL ModeStringfile encoding.
    --dialectNCSV ModeStringexcelThe csv file dialect, the default is excel. Current supports excel, excel-tab and unix formats.
    --delimiterNCSV ModeString,A one-character string used to separate fields for the csv file.
    --quotecharNCSV ModeString"A one-character string used to quote fields containing special characters, such as the delimiter or quotechar, or which contain new-line characters.
    --skipinitialspace/--no-skipinitialspaceNCSV ModeBoolFalseWhether to skip spaces after delimiter for the csv file.
    --strict/--no-strictNCSV ModeBoolFalseWhen True, raise exception Error if the csv is not well formed.

    Examples for dataset building

    #- from dataset.yaml
    swcli dataset build # build dataset from dataset.yaml in the current work directory(pwd)
    swcli dataset build --yaml /path/to/dataset.yaml # build dataset from /path/to/dataset.yaml, all the involved files are related to the dataset.yaml file.
    swcli dataset build --overwrite --yaml /path/to/dataset.yaml # build dataset from /path/to/dataset.yaml, and overwrite the existed dataset.
    swcli dataset build --tag tag1 --tag tag2

    #- from handler
    swcli dataset build --handler mnist.dataset:iter_mnist_item # build dataset from mnist.dataset:iter_mnist_item handler, the workdir is the current work directory(pwd).
    # build dataset from mnist.dataset:LinkRawDatasetProcessExecutor handler, the workdir is example/mnist
    swcli dataset build --handler mnist.dataset:LinkRawDatasetProcessExecutor --workdir example/mnist

    #- from image folder
    swcli dataset build --image-folder /path/to/image/folder # build dataset from /path/to/image/folder, search all image type files.

    #- from audio folder
    swcli dataset build --audio-folder /path/to/audio/folder # build dataset from /path/to/audio/folder, search all audio type files.

    #- from video folder
    swcli dataset build --video-folder /path/to/video/folder # build dataset from /path/to/video/folder, search all video type files.

    #- from json/jsonl file
    swcli dataset build --json /path/to/example.json
    swcli dataset build --json http://example.com/example.json
    swcli dataset build --json /path/to/example.json --field-selector a.b.c # extract the json_content["a"]["b"]["c"] field from the json file.
    swcli dataset build --name qald9 --json https://raw.githubusercontent.com/ag-sc/QALD/master/9/data/qald-9-test-multilingual.json --field-selector questions
    swcli dataset build --json /path/to/test01.jsonl --json /path/to/test02.jsonl
    swcli dataset build --json https://modelscope.cn/api/v1/datasets/damo/100PoisonMpts/repo\?Revision\=master\&FilePath\=train.jsonl

    #- from huggingface dataset
    swcli dataset build --huggingface mnist
    swcli dataset build -hf mnist --no-cache
    swcli dataset build -hf cais/mmlu --subset anatomy --split auxiliary_train --revision 7456cfb

    #- from csv files
    swcli dataset build --csv /path/to/example.csv
    swcli dataset build --csv /path/to/example.csv --csv-file /path/to/example2.csv
    swcli dataset build --csv /path/to/csv-dir
    swcli dataset build --csv http://example.com/example.csv
    swcli dataset build --name product-desc-modelscope --csv https://modelscope.cn/api/v1/datasets/lcl193798/product_description_generation/repo\?Revision\=master\&FilePath\=test.csv --encoding=utf-8-sig

    swcli dataset copy

    swcli [GLOBAL OPTIONS] dataset copy [OPTIONS] <SRC> <DEST>

    dataset copy copies from SRC to DEST.

    SRC and DEST are both dataset URIs.

    When copying Starwhale Dataset, all custom user-defined labels will be copied by default. You can use the --ignore-tag parameter to ignore certain labels. In addition, the latest and ^v\d+$ labels are Starwhale built-in labels that are only used within the instance itself and will not be copied to other instances.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, DEST will be overwritten if it exists. In addition, if the labels carried during duplication have already been used by other versions, this parameter can be used to forcibly update the labels to this version.
    -p or --patchone of --patch and --overwriteBooleanTruePatch mode, only update the changed rows and columns for the remote dataset.
    -o or --overwriteone of --patch and --overwriteBooleanFalseOverwrite mode, update records and delete extraneous rows from the remote dataset.
    -i or --ignore-tagNStringIgnore tags to copy. The option can be used multiple times.

    Examples for dataset copy

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud dataset to local project(myproject) with a new dataset name 'mnist-local'
    swcli dataset cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq local/project/myproject/mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud dataset to local default project(self) with the cloud instance dataset name 'mnist-cloud'
    swcli dataset cp --patch cloud://pre-k8s/project/dataset/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq .

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud dataset to local project(myproject) with the cloud instance dataset name 'mnist-cloud'
    swcli dataset cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq . -dlp myproject

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud dataset to local default project(self) with a dataset name 'mnist-local'
    swcli dataset cp --overwrite cloud://pre-k8s/project/dataset/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud dataset to local project(myproject) with a dataset name 'mnist-local'
    swcli dataset cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local -dlp myproject

    #- copy standalone instance(local) default project(self)'s mnist-local dataset to cloud instance(pre-k8s) mnist project with a new dataset name 'mnist-cloud'
    swcli dataset cp mnist-local/version/latest cloud://pre-k8s/project/mnist/mnist-cloud

    #- copy standalone instance(local) default project(self)'s mnist-local dataset to cloud instance(pre-k8s) mnist project with standalone instance dataset name 'mnist-local'
    swcli dataset cp mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy standalone instance(local) default project(self)'s mnist-local dataset to cloud instance(pre-k8s) mnist project without 'cloud://' prefix
    swcli dataset cp mnist-local/version/latest pre-k8s/project/mnist

    #- copy standalone instance(local) project(myproject)'s mnist-local dataset to cloud instance(pre-k8s) mnist project with standalone instance dataset name 'mnist-local'
    swcli dataset cp local/project/myproject/dataset/mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy without some tags
    swcli dataset cp mnist cloud://cloud.starwhale.cn/project/starwhale:public --ignore-tag t1 --force

    swcli dataset diff

    swcli [GLOBAL OPTIONS] dataset diff [OPTIONS] <DATASET VERSION> <DATASET VERSION>

    dataset diff compares the difference between two versions of the same dataset.

    DATASET VERSION is a dataset URI.

    OptionRequiredTypeDefaultsDescription
    --show-detailsNBooleanFalseIf true, outputs the detail information.
    swcli [全局选项] dataset head [选项] <DATASET VERSION>

    Print the first n rows of the dataset. DATASET VERSION is a dataset URI.

    OptionRequiredTypeDefaultsDescription
    -n or --rowsNInt5Print the first NUM rows of the dataset.
    -srd or --show-raw-dataNBooleanFalseFetch raw data content from objectstore.
    -st or --show-typesNBooleanFalseshow data types.

    Examples for dataset head

    #- print the first 5 rows of the mnist dataset
    swcli dataset head -n 5 mnist

    #- print the first 10 rows of the mnist(v0 version) dataset and show raw data
    swcli dataset head -n 10 mnist/v0 --show-raw-data

    #- print the data types of the mnist dataset
    swcli dataset head mnist --show-types

    #- print the remote cloud dataset's first 5 rows
    swcli dataset head cloud://cloud-cn/project/test/dataset/mnist -n 5

    #- print the first 5 rows in the json format
    swcli -o json dataset head -n 5 mnist

    swcli dataset history

    swcli [GLOBAL OPTIONS] dataset history [OPTIONS] <DATASET>

    dataset history outputs all history versions of the specified Starwhale Dataset.

    DATASET is a dataset URI.

    OptionRequiredTypeDefaultsDescription
    --fullnameNBooleanFalseShow the full version name. Only the first 12 characters are shown if this option is false.

    swcli dataset info

    swcli [GLOBAL OPTIONS] dataset info [OPTIONS] <DATASET>

    dataset info outputs detailed information about the specified Starwhale Dataset version.

    DATASET is a dataset URI.

    swcli dataset list

    swcli [GLOBAL OPTIONS] dataset list [OPTIONS]

    dataset list shows all Starwhale Datasets.

    OptionRequiredTypeDefaultsDescription
    --projectNStringThe URI of the project to list. Use the default project if not specified.
    --fullnameNBooleanFalseShow the full version name. Only the first 12 characters are shown if this option is false.
    --show-removed or -srNBooleanFalseIf true, include datasets that are removed but not garbage collected.
    --pageNInteger1The starting page number. Server and cloud instances only.
    --sizeNInteger20The number of items in one page. Server and cloud instances only.
    --filter or -flNStringShow only Starwhale Datasetes that match specified filters. This option can be used multiple times in one command.
    FilterTypeDescriptionExample
    nameKey-ValueThe name prefix of datasets--filter name=mnist
    ownerKey-ValueThe dataset owner name--filter owner=starwhale
    latestFlagIf specified, it shows only the latest version.--filter latest

    swcli dataset recover

    swcli [GLOBAL OPTIONS] dataset recover [OPTIONS] <DATASET>

    dataset recover recovers previously removed Starwhale Datasets or versions.

    DATASET is a dataset URI. If the version part of the URI is omitted, all removed versions are recovered.

    Garbage-collected Starwhale Datasets or versions can not be recovered, as well as those are removed with the --force option.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, overwrite the Starwhale Dataset or version with the same name or version id.

    swcli dataset remove

    swcli [GLOBAL OPTIONS] dataset remove [OPTIONS] <DATASET>

    dataset remove removes the specified Starwhale Dataset or version.

    DATASET is a dataset URI. If the version part of the URI is omitted, all versions are removed.

    Removed Starwhale Datasets or versions can be recovered by swcli dataset recover before garbage collection. Use the --force option to persistently remove a Starwhale Dataset or version.

    Removed Starwhale Datasets or versions can be listed by swcli dataset list --show-removed.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, persistently delete the Starwhale Dataset or version. It can not be recovered.

    swcli dataset summary

    swcli [GLOBAL OPTIONS]  dataset summary <DATASET>

    Show dataset summary. DATASET is a dataset URI.

    swcli dataset tag

    swcli [GLOBAL OPTIONS] dataset tag [OPTIONS] <DATASET> [TAGS]...

    dataset tag attaches a tag to a specified Starwhale Dataset version. At the same time, tag command also supports list and remove tags. The tag can be used in a dataset URI instead of the version id.

    DATASET is a dataset URI.

    Each dataset version can have any number of tags, but duplicated tag names are not allowed in the same dataset.

    dataset tag only works for the Standalone Instance.

    OptionRequiredTypeDefaultsDescription
    --remove or -rNBooleanFalseremove the tag if true
    --quiet or -qNBooleanFalseignore errors, for example, removing tags that do not exist.
    --force-add or -fNBooleanFalseWhen adding labels to server/cloud instances, if the label is already used by another dataset version, an error will be prompted. In this case, you can force an update using the --force-add parameter.

    Examples for dataset tag

    #- list tags of the mnist dataset
    swcli dataset tag mnist

    #- add tags for the mnist dataset
    swcli dataset tag mnist t1 t2
    swcli dataset tag cloud://cloud.starwhale.cn/project/public:starwhale/dataset/mnist/version/latest t1 --force-ad
    swcli dataset tag mnist t1 --quiet

    #- remove tags for the mnist dataset
    swcli dataset tag mnist -r t1 t2
    swcli dataset tag cloud://cloud.starwhale.cn/project/public:starwhale/dataset/mnist --remove t1
    - + \ No newline at end of file diff --git a/0.6.0/reference/swcli/index.html b/0.6.0/reference/swcli/index.html index c73e2f96b..3612faaf4 100644 --- a/0.6.0/reference/swcli/index.html +++ b/0.6.0/reference/swcli/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    Overview

    Usage

    swcli [OPTIONS] <COMMAND> [ARGS]...
    note

    sw and starwhale are aliases for swcli.

    Global Options

    OptionDescription
    --versionShow the Starwhale Client version
    -v or --verboseShow verbose log, support multi counts for -v args. More -v args, more logs.
    --helpShow the help message.
    caution

    Global options must be put immediately after swcli, and before any command.

    Commands

    - + \ No newline at end of file diff --git a/0.6.0/reference/swcli/instance/index.html b/0.6.0/reference/swcli/instance/index.html index 8dbfe47c6..af76b605c 100644 --- a/0.6.0/reference/swcli/instance/index.html +++ b/0.6.0/reference/swcli/instance/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    swcli instance

    Overview

    swcli [GLOBAL OPTIONS] instance [OPTIONS] <SUBCOMMAND> [ARGS]

    The instance command includes the following subcommands:

    • info
    • list (ls)
    • login
    • logout
    • use (select)

    swcli instance info

    swcli [GLOBAL OPTIONS] instance info [OPTIONS] <INSTANCE>

    instance info outputs detailed information about the specified Starwhale Instance.

    INSTANCE is an instance URI.

    swcli instance list

    swcli [GLOBAL OPTIONS] instance list [OPTIONS]

    instance list shows all Starwhale Instances.

    swcli instance login

    swcli [GLOBAL OPTIONS] instance login [OPTIONS] <INSTANCE>

    instance login connects to a Server/Cloud instance and makes the specified instance default.

    INSTANCE is an instance URI.

    OptionRequiredTypeDefaultsDescription
    --usernameNStringThe login username.
    --passwordNStringThe login password.
    --tokenNStringThe login token.
    --aliasYStringThe alias of the instance. You can use it anywhere that requires an instance URI.

    --username and --password can not be used together with --token.

    swcli instance logout

    swcli [GLOBAL OPTIONS] instance logout [INSTANCE]

    instance logout disconnects from the Server/Cloud instance, and clears information stored in the local storage.

    INSTANCE is an instance URI. If it is omiited, the default instance is used instead.

    swcli instance use

    swcli [GLOBAL OPTIONS] instance use <INSTANCE>

    instance use make the specified instance default.

    INSTANCE is an instance URI.

    - + \ No newline at end of file diff --git a/0.6.0/reference/swcli/job/index.html b/0.6.0/reference/swcli/job/index.html index b4d37706b..f28699b38 100644 --- a/0.6.0/reference/swcli/job/index.html +++ b/0.6.0/reference/swcli/job/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    swcli job

    Overview

    swcli [GLOBAL OPTIONS] job [OPTIONS] <SUBCOMMAND> [ARGS]...

    The job command includes the following subcommands:

    • cancel
    • info
    • list(ls)
    • pause
    • recover
    • remove(rm)
    • resume

    swcli job cancel

    swcli [GLOBAL OPTIONS] job cancel [OPTIONS] <JOB>

    job cancel stops the specified job. On Standalone instance, this command only takes effect for containerized jobs.

    JOB is a job URI.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, kill the Starwhale Job by force.

    swcli job info

    swcli [GLOBAL OPTIONS] job info [OPTIONS] <JOB>

    job info outputs detailed information about the specified Starwhale Job.

    JOB is a job URI.

    swcli job list

    swcli [GLOBAL OPTIONS] job list [OPTIONS]

    job list shows all Starwhale Jobs.

    OptionRequiredTypeDefaultsDescription
    --projectNStringThe URI of the project to list. Use the default project if not specified.
    --show-removed or -srNBooleanFalseIf true, include packages that are removed but not garbage collected.
    --pageNInteger1The starting page number. Server and cloud instances only.
    --sizeNInteger20The number of items in one page. Server and cloud instances only.

    swcli job pause

    swcli [GLOBAL OPTIONS] job pause [OPTIONS] <JOB>

    job pause pauses the specified job. Paused jobs can be resumed by job resume. On Standalone instance, this command only takes effect for containerized jobs.

    JOB is a job URI.

    From Starwhale's perspective, pause is almost the same as cancel, except that the job reuses the old Job id when resumed. It is job developer's responsibility to save all data periodically and load them when resumed. The job id is usually used as a key of the checkpoint.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, kill the Starwhale Job by force.

    swcli job resume

    swcli [GLOBAL OPTIONS] job resume [OPTIONS] <JOB>

    job resume resumes the specified job. On Standalone instance, this command only takes effect for containerized jobs.

    JOB is a job URI.

    - + \ No newline at end of file diff --git a/0.6.0/reference/swcli/model/index.html b/0.6.0/reference/swcli/model/index.html index b48ba37de..569520463 100644 --- a/0.6.0/reference/swcli/model/index.html +++ b/0.6.0/reference/swcli/model/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    swcli model

    Overview

    swcli [GLOBAL OPTIONS] model [OPTIONS] <SUBCOMMAND> [ARGS]...

    The model command includes the following subcommands:

    • build
    • copy(cp)
    • diff
    • extract
    • history
    • info
    • list(ls)
    • recover
    • remove(rm)
    • run
    • serve
    • tag

    swcli model build

    swcli [GLOBAL OPTIONS] model build [OPTIONS] <WORKDIR>

    model build will put the whole WORKDIR into the model, except files that match patterns defined in .swignore.

    model build will import modules specified by --module to generate the required configurations to run the model. If your module depends on third-party libraries, we strongly recommend you use the --runtime option; otherwise, you need to ensure that the python environment used by swcli has these libraries installed.

    OptionRequiredTypeDefaultsDescription
    --project or -pNStringthe default projectthe project URI
    --model-yaml or -fNString${workdir}/model.yamlmodel yaml path, default use ${workdir}/model.yaml file. model.yaml is optional for model build.
    --module or -mNStringPython modules to be imported during the build process. Starwhale will export model handlers from these modules to the model package. This option supports set multiple times.
    --runtime or -rNStringthe URI of the Starwhale Runtime to use when running this command. If this option is used, this command will run in an independent python environment specified by the Starwhale Runtime; otherwise, it will run directly in the swcli's current python environment.
    --name or -nNStringmodel package name
    --desc or -dNStringmodel package description
    --package-runtime--no-package-runtimeNBooleanTrueWhen using the --runtime parameter, by default, the corresponding Starwhale runtime will become the built-in runtime for the Starwhale model. This feature can be disabled with the --no-package-runtime parameter.
    --add-allNBooleanFalseAdd all files in the working directory to the model package(excludes python cache files and virtual environment files when disabled).The .swignore file still takes effect.
    -t or --tagNGlobalString

    Examples for model build

    # build by the model.yaml in current directory and model package will package all the files from the current directory.
    swcli model build .
    # search model run decorators from mnist.evaluate, mnist.train and mnist.predict modules, then package all the files from the current directory to model package.
    swcli model build . --module mnist.evaluate --module mnist.train --module mnist.predict
    # build model package in the Starwhale Runtime environment.
    swcli model build . --module mnist.evaluate --runtime pytorch/version/v1
    # forbid to package Starwhale Runtime into the model.
    swcli model build . --module mnist.evaluate --runtime pytorch/version/v1 --no-package-runtime
    # build model package with tags.
    swcli model build . --tag tag1 --tag tag2

    swcli model copy

    swcli [GLOBAL OPTIONS] model copy [OPTIONS] <SRC> <DEST>

    model copy copies from SRC to DEST for Starwhale Model sharing.

    SRC and DEST are both model URIs.

    When copying Starwhale Model, all custom user-defined labels will be copied by default. You can use the --ignore-tag parameter to ignore certain labels. In addition, the latest and ^v\d+$ labels are Starwhale built-in labels that are only used within the instance itself and will not be copied to other instances.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, DEST will be overwritten if it exists. In addition, if the labels carried during duplication have already been used by other versions, this parameter can be used to forcibly update the labels to this version.
    -i or --ignore-tagNStringIgnore tags to copy. The option can be used multiple times.

    Examples for model copy

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local project(myproject) with a new model name 'mnist-local'
    swcli model cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq local/project/myproject/mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local default project(self) with the cloud instance model name 'mnist-cloud'
    swcli model cp cloud://pre-k8s/project/model/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq .

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local project(myproject) with the cloud instance model name 'mnist-cloud'
    swcli model cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq . -dlp myproject

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local default project(self) with a model name 'mnist-local'
    swcli model cp cloud://pre-k8s/project/model/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local project(myproject) with a model name 'mnist-local'
    swcli model cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local -dlp myproject

    #- copy standalone instance(local) default project(self)'s mnist-local model to cloud instance(pre-k8s) mnist project with a new model name 'mnist-cloud'
    swcli model cp mnist-local/version/latest cloud://pre-k8s/project/mnist/mnist-cloud

    #- copy standalone instance(local) default project(self)'s mnist-local model to cloud instance(pre-k8s) mnist project with standalone instance model name 'mnist-local'
    swcli model cp mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy standalone instance(local) default project(self)'s mnist-local model to cloud instance(pre-k8s) mnist project without 'cloud://' prefix
    swcli model cp mnist-local/version/latest pre-k8s/project/mnist

    #- copy standalone instance(local) project(myproject)'s mnist-local model to cloud instance(pre-k8s) mnist project with standalone instance model name 'mnist-local'
    swcli model cp local/project/myproject/model/mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy without some tags
    swcli model cp mnist cloud://cloud.starwhale.cn/project/starwhale:public --ignore-tag t1

    swcli model diff

    swcli [GLOBAL OPTIONS] model diff [OPTIONS] <MODEL VERSION> <MODEL VERSION>

    model diff compares the difference between two versions of the same model.

    MODEL VERSION is a model URI.

    OptionRequiredTypeDefaultsDescription
    --show-detailsNBooleanFalseIf true, outputs the detail information.

    swcli model extract

    swcli [GLOBAL OPTIONS] model extract [OPTIONS] <MODEL> <TARGET_DIR>

    The model extract command can extract a Starwhale model to a specified directory for further customization.

    MODEL is a model URI.

    OptionRequiredTypeDefaultDescription
    --force or -fNBooleanFalseIf this option is used, it will forcibly overwrite existing extracted model files in the target directory.

    Examples for model extract

    #- extract mnist model package to current directory
    swcli model extract mnist/version/xxxx .

    #- extract mnist model package to current directory and force to overwrite the files
    swcli model extract mnist/version/xxxx . -f

    swcli model history

    swcli [GLOBAL OPTIONS] model history [OPTIONS] <MODEL>

    model history outputs all history versions of the specified Starwhale Model.

    MODEL is a model URI.

    OptionRequiredTypeDefaultsDescription
    --fullnameNBooleanFalseShow the full version name. Only the first 12 characters are shown if this option is false.

    swcli model info

    swcli [GLOBAL OPTIONS] model info [OPTIONS] <MODEL>

    model info outputs detailed information about the specified Starwhale Model version.

    MODEL is a model URI.

    OptionRequiredTypeDefaultsDescription
    --output-filter or -ofNChoice of [basic/model_yaml/manifest/files/handlers/all]basicFilter the output content. Only standalone instance supports this option.

    Examples for model info

    swcli model info mnist # show basic info from the latest version of model
    swcli model info mnist/version/v0 # show basic info from the v0 version of model
    swcli model info mnist/version/latest --output-filter=all # show all info
    swcli model info mnist -of basic # show basic info
    swcli model info mnist -of model_yaml # show model.yaml
    swcli model info mnist -of handlers # show model runnable handlers info
    swcli model info mnist -of files # show model package files tree
    swcli -o json model info mnist -of all # show all info in json format

    swcli model list

    swcli [GLOBAL OPTIONS] model list [OPTIONS]

    model list shows all Starwhale Models.

    OptionRequiredTypeDefaultsDescription
    --projectNStringThe URI of the project to list. Use the default project if not specified.
    --fullnameNBooleanFalseShow the full version name. Only the first 12 characters are shown if this option is false.
    --show-removedNBooleanFalseIf true, include packages that are removed but not garbage collected.
    --pageNInteger1The starting page number. Server and cloud instances only.
    --sizeNInteger20The number of items in one page. Server and cloud instances only.
    --filter or -flNStringShow only Starwhale Models that match specified filters. This option can be used multiple times in one command.
    FilterTypeDescriptionExample
    nameKey-ValueThe name prefix of models--filter name=mnist
    ownerKey-ValueThe model owner name--filter owner=starwhale
    latestFlagIf specified, it shows only the latest version.--filter latest

    swcli model recover

    swcli [GLOBAL OPTIONS] model recover [OPTIONS] <MODEL>

    model recover recovers previously removed Starwhale Models or versions.

    MODEL is a model URI. If the version part of the URI is omitted, all removed versions are recovered.

    Garbage-collected Starwhale Models or versions can not be recovered, as well as those are removed with the --force option.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, overwrite the Starwhale Model or version with the same name or version id.

    swcli model remove

    swcli [GLOBAL OPTIONS] model remove [OPTIONS] <MODEL>

    model remove removes the specified Starwhale Model or version.

    MODEL is a model URI. If the version part of the URI is omitted, all versions are removed.

    Removed Starwhale Models or versions can be recovered by swcli model recover before garbage collection. Use the --force option to persistently remove a Starwhale Model or version.

    Removed Starwhale Models or versions can be listed by swcli model list --show-removed.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, persistently delete the Starwhale Model or version. It can not be recovered.

    swcli model run

    swcli [GLOBAL OPTIONS] model run [OPTIONS]

    model run executes a model handler. Model run supports two modes to run: model URI and local development. Model URI mode needs a pre-built Starwhale Model Package. Local development model only needs the model src dir.

    OptionRequiredTypeDefaultsDescription
    --workdir or -wNStringFor local development mode, the path of model src dir.
    --uri or -uNStringFor model URI mode, the string of model uri.
    --handler or -hNStringRunnable handler index or name, default is None, will use the first handler
    --module or -mNStringThe name of the Python module to import. This parameter can be set multiple times.
    --runtime or -rNStringthe Starwhale Runtime URI to use when running this command. If this option is used, this command will run in an independent python environment specified by the Starwhale Runtime; otherwise, it will run directly in the swcli's current python environment.
    --model-yaml or -fNString${MODEL_DIR}/model.yamlThe path to the model.yaml. model.yaml is optional for model run.
    --run-project or -pNStringDefault projectProject URI, indicates the model run results will be stored in the corresponding project.
    --dataset or -dNStringDataset URI, the Starwhale dataset required for model running. This parameter can be set multiple times.
    --dataset-head or -dhNInteger0[ONLY STANDALONE]For debugging purpose, every prediction task will, at most, consume the first n rows from every dataset.When the value is less than or equal to 0, all samples will be used.
    --in-containerNBooleanFalseUse docker container to run the model. This option is only available for standalone instances. For server and cloud instances, a docker image is always used. If the runtime is a docker image, this option is always implied.
    --forbid-snapshot or -fsNBooleanFalseIn model URI mode, each model run uses a new snapshot directory. Setting this parameter will directly use the model's workdir as the run directory. In local dev mode, this parameter does not take effect, each run is in the --workdir specified directory.
    -- --user-arbitrary-argsNStringSpecify the args you defined in your handlers.

    Examples for model run

    # --> run by model uri
    # run the first handler from model uri
    swcli model run -u mnist/version/latest
    # run index id(1) handler from model uri
    swcli model run --uri mnist/version/latest --handler 1
    # run index fullname(mnist.evaluator:MNISTInference.cmp) handler from model uri
    swcli model run --uri mnist/version/latest --handler mnist.evaluator:MNISTInference.cmp

    # --> run by the working directory, which does not build model package yet. Make local debug happy.
    # run the first handler from the working directory, use the model.yaml in the working directory
    swcli model run -w .
    # run index id(1) handler from the working directory, search mnist.evaluator module and model.yaml handlers(if existed) to get runnable handlers
    swcli model run --workdir . --module mnist.evaluator --handler 1
    # run index fullname(mnist.evaluator:MNISTInference.cmp) handler from the working directory, search mnist.evaluator module to get runnable handlers
    swcli model run --workdir . --module mnist.evaluator --handler mnist.evaluator:MNISTInference.cmp
    # run the f handler in th.py from the working directory with the args defined in th:f
    # @handler()
    # def f(
    # x=ListInput(IntInput()),
    # y=2,
    # mi=MyInput(),
    # ds=DatasetInput(required=True),
    # ctx=ContextInput(),
    # )
    swcli model run -w . -m th --handler th:f -- -x 2 -x=1 --mi=blab-la --ds mnist

    # --> run with dataset of head 10
    swcli model run --uri mnist --dataset-head 10 --dataset mnist

    swcli model serve

    Here is the English translation:

    swcli [GLOBAL OPTIONS] model serve [OPTIONS]

    The model serve command can run the model as a web server, and provide a simple web interaction interface.

    OptionRequiredTypeDefaultsDescription
    --workdir or -wNStringIn local dev mode, specify the directory of the model code.
    --uri or -uNStringIn model URI mode, specify the model URI.
    --runtime or -rNStringThe URI of the Starwhale runtime to use when running this command. If specified, the command will run in the isolated Python environment defined in the Starwhale runtime. Otherwise it will run directly in the current Python environment of swcli.
    --model-yaml or -fNString${MODEL_DIR}/model.yamlThe path to the model.yaml. model.yaml is optional for model serve.
    --module or -mNStringName of the Python module to import. This parameter can be set multiple times.
    --hostNString127.0.0.1The address for the service to listen on.
    --portNInteger8080The port for the service to listen on.

    Examples for model serve

    swcli model serve -u mnist
    swcli model serve --uri mnist/version/latest --runtime pytorch/version/latest

    swcli model serve --workdir . --runtime pytorch/version/v0
    swcli model serve --workdir . --runtime pytorch/version/v1 --host 0.0.0.0 --port 8080
    swcli model serve --workdir . --runtime pytorch --module mnist.evaluator

    swcli model tag

    swcli [GLOBAL OPTIONS] model tag [OPTIONS] <MODEL> [TAGS]...

    model tag attaches a tag to a specified Starwhale Model version. At the same time, tag command also supports list and remove tags. The tag can be used in a model URI instead of the version id.

    MODEL is a model URI.

    Each model version can have any number of tags, but duplicated tag names are not allowed in the same model.

    model tag only works for the Standalone Instance.

    OptionRequiredTypeDefaultsDescription
    --remove or -rNBooleanFalseremove the tag if true
    --quiet or -qNBooleanFalseignore errors, for example, removing tags that do not exist.
    --force-add or -fNBooleanFalseWhen adding labels to server/cloud instances, if the label is already used by another model version, an error will be prompted. In this case, you can force an update using the --force-add parameter.

    Examples for model tag

    #- list tags of the mnist model
    swcli model tag mnist

    #- add tags for the mnist model
    swcli model tag mnist t1 t2
    swcli model tag cloud://cloud.starwhale.cn/project/public:starwhale/model/mnist/version/latest t1 --force-add
    swcli model tag mnist t1 --quiet

    #- remove tags for the mnist model
    swcli model tag mnist -r t1 t2
    swcli model tag cloud://cloud.starwhale.cn/project/public:starwhale/model/mnist --remove t1
    - + \ No newline at end of file diff --git a/0.6.0/reference/swcli/project/index.html b/0.6.0/reference/swcli/project/index.html index e04b1cf14..c2687788a 100644 --- a/0.6.0/reference/swcli/project/index.html +++ b/0.6.0/reference/swcli/project/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    swcli project

    Overview

    swcli [GLOBAL OPTIONS] project [OPTIONS] <SUBCOMMAND> [ARGS]...

    The project command includes the following subcommands:

    • create(add, new)
    • info
    • list(ls)
    • recover
    • remove(ls)
    • use(select)

    swcli project create

    swcli [GLOBAL OPTIONS] project create <PROJECT>

    project create creates a new project.

    PROJECT is a project URI.

    swcli project info

    swcli [GLOBAL OPTIONS] project info [OPTIONS] <PROJECT>

    project info outputs detailed information about the specified Starwhale Project.

    PROJECT is a project URI.

    swcli project list

    swcli [GLOBAL OPTIONS] project list [OPTIONS]

    project list shows all Starwhale Projects.

    OptionRequiredTypeDefaultsDescription
    --instanceNStringThe URI of the instance to list. If this option is omitted, use the default instance.
    --show-removedNBooleanFalseIf true, include projects that are removed but not garbage collected.
    --pageNInteger1The starting page number. Server and cloud instances only.
    --sizeNInteger20The number of items in one page. Server and cloud instances only.

    swcli project recover

    swcli [GLOBAL OPTIONS] project recover [OPTIONS] <PROJECT>

    project recover recovers previously removed Starwhale Projects.

    PROJECT is a project URI.

    Garbage-collected Starwhale Projects can not be recovered, as well as those are removed with the --force option.

    swcli project remove

    swcli [GLOBAL OPTIONS] project remove [OPTIONS] <PROJECT>

    project remove removes the specified Starwhale Project.

    PROJECT is a project URI.

    Removed Starwhale Projects can be recovered by swcli project recover before garbage collection. Use the --force option to persistently remove a Starwhale Project.

    Removed Starwhale Project can be listed by swcli project list --show-removed.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, persistently delete the Starwhale Project. It can not be recovered.

    swcli project use

    swcli [GLOBAL OPTIONS] project use <PROJECT>

    project use make the specified project default. You must login at first to use a project on a Server/Cloud instance.

    - + \ No newline at end of file diff --git a/0.6.0/reference/swcli/runtime/index.html b/0.6.0/reference/swcli/runtime/index.html index 683b8cd34..4b162ac17 100644 --- a/0.6.0/reference/swcli/runtime/index.html +++ b/0.6.0/reference/swcli/runtime/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    swcli runtime

    Overview

    swcli [GLOBAL OPTIONS] runtime [OPTIONS] <SUBCOMMAND> [ARGS]...

    The runtime command includes the following subcommands:

    • activate(actv)
    • build
    • copy(cp)
    • dockerize
    • extract
    • history
    • info
    • list(ls)
    • recover
    • remove(rm)
    • tag

    swcli runtime activate

    swcli [GLOBAL OPTIONS] runtime activate [OPTIONS] <RUNTIME>

    Like source venv/bin/activate or conda activate xxx, runtime activate setups a new python environment according to the settings of the specified runtime. When the current shell is closed or switched to another one, you need to reactivate the runtime.RUNTIME is a Runtime URI.

    If you want to quit the activated runtime environment, please run venv deactivate in the venv environment or conda deactivate in the conda environment.

    The runtime activate command will build a Python isolated environment and download relevant Python packages according to the definition of the Starwhale runtime when activating the environment for the first time. This process may spend a lot of time.

    swcli runtime build

    swcli [GLOBAL OPTIONS] runtime build [OPTIONS]

    The runtime build command can build a shareable and reproducible runtime environment suitable for ML/DL from various environments or runtime.yaml file.

    Parameters

    • Parameters related to runtime building methods:
    OptionRequiredTypeDefaultsDescription
    -c or --condaNStringFind the corresponding conda environment by conda env name, export Python dependencies to generate Starwhale runtime.
    -cp or --conda-prefixNStringFind the corresponding conda environment by conda env prefix path, export Python dependencies to generate Starwhale runtime.
    -v or --venvNStringFind the corresponding venv environment by venv directory address, export Python dependencies to generate Starwhale runtime.
    -s or --shellNStringExport Python dependencies according to current shell environment to generate Starwhale runtime.
    -y or --yamlNruntime.yaml in cwd directoryBuild Starwhale runtime according to user-defined runtime.yaml.
    -d or --dockerNStringUse the docker image as Starwhale runtime.

    The parameters for runtime building methods are mutually exclusive, only one method can be specified. If not specified, it will use --yaml method to read runtime.yaml in cwd directory to build Starwhale runtime.

    • Other parameters:
    OptionRequiredScopeTypeDefaultsDescription
    --project or -pNGlobalStringDefault projectProject URI
    -del or --disable-env-lockNruntime.yaml modeBooleanFalseWhether to install dependencies in runtime.yaml and lock the version information of related dependencies. The dependencies will be locked by default.
    -nc or --no-cacheNruntime.yaml modeBooleanFalseWhether to delete the isolated environment and install related dependencies from scratch. By default dependencies will be installed in the existing isolated environment.
    --cudaNconda/venv/shell modeChoice[11.3/11.4/11.5/11.6/11.7/]CUDA version, CUDA will not be used by default.
    --cudnnNconda/venv/shell modeChoice[8/]cuDNN version, cuDNN will not be used by default.
    --archNconda/venv/shell modeChoice[amd64/arm64/noarch]noarchArchitecture
    -dpo or --dump-pip-optionsNGlobalBooleanFalseDump pip config options from the ~/.pip/pip.conf file.
    -dcc or --dump-condarcNGlobalBooleanFalseDump conda config from the ~/.condarc file.
    -t or --tagNGlobalStringRuntime tags, the option can be used multiple times.

    Examples for Starwhale Runtime building

    #- from runtime.yaml:
    swcli runtime build # use the current directory as the workdir and use the default runtime.yaml file
    swcli runtime build -y example/pytorch/runtime.yaml # use example/pytorch/runtime.yaml as the runtime.yaml file
    swcli runtime build --yaml runtime.yaml # use runtime.yaml at the current directory as the runtime.yaml file
    swcli runtime build --tag tag1 --tag tag2

    #- from conda name:
    swcli runtime build -c pytorch # lock pytorch conda environment and use `pytorch` as the runtime name
    swcli runtime build --conda pytorch --name pytorch-runtime # use `pytorch-runtime` as the runtime name
    swcli runtime build --conda pytorch --cuda 11.4 # specify the cuda version
    swcli runtime build --conda pytorch --arch noarch # specify the system architecture

    #- from conda prefix path:
    swcli runtime build --conda-prefix /home/starwhale/anaconda3/envs/pytorch # get conda prefix path by `conda info --envs` command

    #- from venv prefix path:
    swcli runtime build -v /home/starwhale/.virtualenvs/pytorch
    swcli runtime build --venv /home/starwhale/.local/share/virtualenvs/pytorch --arch amd64

    #- from docker image:
    swcli runtime build --docker pytorch/pytorch:1.9.0-cuda11.1-cudnn8-runtime # use the docker image as the runtime directly

    #- from shell:
    swcli runtime build -s --cuda 11.4 --cudnn 8 # specify the cuda and cudnn version
    swcli runtime build --shell --name pytorch-runtime # lock the current shell environment and use `pytorch-runtime` as the runtime name

    swcli runtime copy

    swcli [GLOBAL OPTIONS] runtime copy [OPTIONS] <SRC> <DEST>

    runtime copy copies from SRC to DEST. SRC and DEST are both Runtime URIs.

    When copying Starwhale Runtime, all custom user-defined labels will be copied by default. You can use the --ignore-tag parameter to ignore certain labels. In addition, the latest and ^v\d+$ labels are built-in Starwhale system labels that are only used within the instance itself and will not be copied to other instances.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, DEST will be overwritten if it exists. In addition, if the labels carried during duplication have already been used by other versions, this parameter can be used to forcibly update the labels to this version.
    -i or --ignore-tagNStringIgnore tags to copy. The option can be used multiple times.

    Examples for Starwhale Runtime copy

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local project(myproject) with a new runtime name 'mnist-local'
    swcli runtime cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq local/project/myproject/mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local default project(self) with the cloud instance runtime name 'mnist-cloud'
    swcli runtime cp cloud://pre-k8s/project/runtime/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq .

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local project(myproject) with the cloud instance runtime name 'mnist-cloud'
    swcli runtime cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq . -dlp myproject

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local default project(self) with a runtime name 'mnist-local'
    swcli runtime cp cloud://pre-k8s/project/runtime/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local project(myproject) with a runtime name 'mnist-local'
    swcli runtime cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local -dlp myproject

    #- copy standalone instance(local) default project(self)'s mnist-local runtime to cloud instance(pre-k8s) mnist project with a new runtime name 'mnist-cloud'
    swcli runtime cp mnist-local/version/latest cloud://pre-k8s/project/mnist/mnist-cloud

    #- copy standalone instance(local) default project(self)'s mnist-local runtime to cloud instance(pre-k8s) mnist project with standalone instance runtime name 'mnist-local'
    swcli runtime cp mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy standalone instance(local) default project(self)'s mnist-local runtime to cloud instance(pre-k8s) mnist project without 'cloud://' prefix
    swcli runtime cp mnist-local/version/latest pre-k8s/project/mnist

    #- copy standalone instance(local) project(myproject)'s mnist-local runtime to cloud instance(pre-k8s) mnist project with standalone instance runtime name 'mnist-local'
    swcli runtime cp local/project/myproject/runtime/mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy without some tags
    swcli runtime cp pytorch cloud://cloud.starwhale.cn/project/starwhale:public --ignore-tag t1

    swcli runtime dockerize

    swcli [GLOBAL OPTIONS] runtime dockerize [OPTIONS] <RUNTIME>

    runtime dockerize generates a docker image based on the specified runtime. Starwhale uses docker buildx to create the image. Docker 19.03 or later is required to run this command.

    RUNTIME is a Runtime URI.

    OptionRequiredTypeDefaultsDescription
    --tag or -tNStringThe tag of the docker image. This option can be repeated multiple times.
    --pushNBooleanFalseIf true, push the image to the docker registry
    --platformNStringamd64The target platform,can be either amd64 or arm64. This option can be repeated multiple times to create a multi-platform image.

    Here is the English translation:

    swcli runtime extract

    swcli [Global Options] runtime extract [Options] <RUNTIME>

    Starwhale runtimes use the compressed packages to distribute. The runtime extract command can be used to extract the runtime package for further customization and modification.

    OptionRequiredTypeDefaultDescription
    --force or -fNBooleanFalseWhether to delete and re-extract if there is already an extracted Starwhale runtime in the target directory.
    --target-dirNStringCustom extraction directory. If not specified, it will be extracted to the default Starwhale runtime workdir. The command log will show the directory location.

    swcli runtime history

    swcli [GLOBAL OPTIONS] runtime history [OPTIONS] <RUNTIME>

    runtime history outputs all history versions of the specified Starwhale Runtime.

    RUNTIME is a Runtime URI.

    OptionRequiredTypeDefaultsDescription
    --fullnameNBooleanFalseShow the full version name. Only the first 12 characters are shown if this option is false.

    swcli runtime info

    swcli [GLOBAL OPTIONS] runtime info [OPTIONS] <RUNTIME>

    runtime info outputs detailed information about a specified Starwhale Runtime version.

    RUNTIME is a Runtime URI.

    OptionRequiredTypeDefaultsDescription
    --output-filter or -ofNChoice of [basic/runtime_yaml/manifest/lock/all]basicFilter the output content. Only standalone instance supports this option.

    Examples for Starwhale Runtime info

    swcli runtime info pytorch # show basic info from the latest version of runtime
    swcli runtime info pytorch/version/v0 # show basic info
    swcli runtime info pytorch/version/v0 --output-filter basic # show basic info
    swcli runtime info pytorch/version/v1 -of runtime_yaml # show runtime.yaml content
    swcli runtime info pytorch/version/v1 -of lock # show auto lock file content
    swcli runtime info pytorch/version/v1 -of manifest # show _manifest.yaml content
    swcli runtime info pytorch/version/v1 -of all # show all info of the runtime

    swcli runtime list

    swcli [GLOBAL OPTIONS] runtime list [OPTIONS]

    runtime list shows all Starwhale Runtimes.

    OptionRequiredTypeDefaultsDescription
    --projectNStringThe URI of the project to list. Use the default project if not specified.
    --fullnameNBooleanFalseShow the full version name. Only the first 12 characters are shown if this option is false.
    --show-removed or -srNBooleanFalseIf true, include runtimes that are removed but not garbage collected.
    --pageNInteger1The starting page number. Server and cloud instances only.
    --sizeNInteger20The number of items in one page. Server and cloud instances only.
    --filter or -flNStringShow only Starwhale Runtimes that match specified filters. This option can be used multiple times in one command.
    FilterTypeDescriptionExample
    nameKey-ValueThe name prefix of runtimes--filter name=pytorch
    ownerKey-ValueThe runtime owner name--filter owner=starwhale
    latestFlagIf specified, it shows only the latest version.--filter latest

    swcli runtime recover

    swcli [GLOBAL OPTIONS] runtime recover [OPTIONS] <RUNTIME>

    runtime recover can recover previously removed Starwhale Runtimes or versions.

    RUNTIME is a Runtime URI. If the version part of the URI is omitted, all removed versions are recovered.

    Garbage-collected Starwhale Runtimes or versions can not be recovered, as well as those are removed with the --force option.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, overwrite the Starwhale Runtime or version with the same name or version id.

    swcli runtime remove

    swcli [GLOBAL OPTIONS] runtime remove [OPTIONS] <RUNTIME>

    runtime remove removes the specified Starwhale Runtime or version.

    RUNTIME is a Runtime URI. If the version part of the URI is omitted, all versions are removed.

    Removed Starwhale Runtimes or versions can be recovered by swcli runtime recover before garbage collection. Use the -- force option to persistently remove a Starwhale Runtime or version.

    Removed Starwhale Runtimes or versions can be listed by swcli runtime list --show-removed.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, persistently delete the Starwhale Runtime or version. It can not be recovered.

    swcli runtime tag

    swcli [GLOBAL OPTIONS] runtime tag [OPTIONS] <RUNTIME> [TAGS]...

    runtime tag attaches a tag to a specified Starwhale Runtime version. At the same time, tag command also supports list and remove tags. The tag can be used in a runtime URI instead of the version id.

    RUNTIME is a Runtime URI.

    Each runtime version can have any number of tags, but duplicated tag names are not allowed in the same runtime.

    runtime tag only works for the Standalone Instance.

    OptionRequiredTypeDefaultsDescription
    --remove or -rNBooleanFalseRemove the tag if true
    --quiet or -qNBooleanFalseIgnore errors, for example, removing tags that do not exist.
    --force-add or -fNBooleanFalseWhen adding labels to server/cloud instances, if the label is already used by another runtime version, an error will be prompted. In this case, you can force an update using the --force-add parameter.

    Examples for runtime tag

    #- list tags of the pytorch runtime
    swcli runtime tag pytorch

    #- add tags for the pytorch runtime
    swcli runtime tag mnist t1 t2
    swcli runtime tag cloud://cloud.starwhale.cn/project/public:starwhale/runtime/pytorch/version/latest t1 --force-add
    swcli runtime tag mnist t1 --quiet

    #- remove tags for the pytorch runtime
    swcli runtime tag mnist -r t1 t2
    swcli runtime tag cloud://cloud.starwhale.cn/project/public:starwhale/runtime/pytorch --remove t1
    - + \ No newline at end of file diff --git a/0.6.0/reference/swcli/utilities/index.html b/0.6.0/reference/swcli/utilities/index.html index 8fc9ea6af..0341f503f 100644 --- a/0.6.0/reference/swcli/utilities/index.html +++ b/0.6.0/reference/swcli/utilities/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    Utility Commands

    swcli gc

    swcli [GLOBAL OPTIONS] gc [OPTIONS]

    gc clears removed projects, models, datasets, and runtimes according to the internal garbage collection policy.

    OptionRequiredTypeDefaultsDescription
    --dry-runNBooleanFalseIf true, outputs objects to be removed instead of clearing them.
    --yesNBooleanFalseBypass confirmation prompts.

    swcli check

    swcli [GLOBAL OPTIONS] check

    Check if the external dependencies of the swcli command meet the requirements. Currently mainly checks Docker and Conda.

    swcli completion install

    swcli [GLOBAL OPTIONS] completion install <SHELL_NAME>

    Install autocompletion for swcli commands. Currently supports bash, zsh and fish. If SHELL_NAME is not specified, it will try to automatically detect the current shell type.

    swcli config edit

    swcli [GLOBAL OPTIONS] config edit

    Edit the Starwhale configuration file at ~/.config/starwhale/config.yaml.

    swcli ui

    swcli [GLOBAL OPTIONS] ui <INSTANCE>

    Open the web page for the corresponding instance.

    - + \ No newline at end of file diff --git a/0.6.0/runtime/index.html b/0.6.0/runtime/index.html index fbe17495a..6a672f01f 100644 --- a/0.6.0/runtime/index.html +++ b/0.6.0/runtime/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    Starwhale Runtime

    overview

    Overview

    Starwhale Runtime aims to provide a reproducible and sharable running environment for python programs. You can easily share your working environment with your teammates or outsiders, and vice versa. Furthermore, you can run your programs on Starwhale Server or Starwhale Cloud without bothering with the dependencies.

    Starwhale works well with virtualenv, conda, and docker. If you are using one of them, it is straightforward to create a Starwhale Runtime based on your current environment.

    Multiple Starwhale Runtimes on your local machine can be switched freely by one command. You can work on different projects without messing up the environment.Starwhale Runtime consists of two parts: the base image and the dependencies.

    The base image

    The base is a docker image with Python, CUDA, and cuDNN installed. Starwhale provides various base images for you to choose from; see the following list:

    • Computer system architecture:
      • X86 (amd64)
      • Arm (aarch64)
    • Operating system:
      • Ubuntu 20.04 LTS (ubuntu:20.04)
    • Python:
      • 3.7
      • 3.8
      • 3.9
      • 3.10
      • 3.11
    • CUDA:
      • CUDA 11.3 + cuDNN 8.4
      • CUDA 11.4 + cuDNN 8.4
      • CUDA 11.5 + cuDNN 8.4
      • CUDA 11.6 + cuDNN 8.4
      • CUDA 11.7
    - + \ No newline at end of file diff --git a/0.6.0/runtime/yaml/index.html b/0.6.0/runtime/yaml/index.html index 26942d2fc..ec58436ab 100644 --- a/0.6.0/runtime/yaml/index.html +++ b/0.6.0/runtime/yaml/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    The runtime.yaml Specification

    runtime.yaml is the configuration file that defines the properties of the Starwhale Runtime. runtime.yaml is required for the yaml mode of the swcli runtime build command.

    Examples

    The simplest example

    dependencies:
    - pip:
    - numpy
    name: simple-test

    Define a Starwhale Runtime that uses venv as the Python virtual environment for package isolation, and installs the numpy dependency.

    The llama2 example

    name: llama2
    mode: venv
    environment:
    arch: noarch
    os: ubuntu:20.04
    cuda: 11.7
    python: "3.10"
    dependencies:
    - pip:
    - torch
    - fairscale
    - fire
    - sentencepiece
    - gradio >= 3.37.0
    # external starwhale dependencies
    - starwhale[serve] >= 0.5.5

    The full definition example

    # [required]The name of Starwhale Runtime
    name: demo
    # [optional]The mode of Starwhale Runtime: venv or conda. Default is venv.
    mode: venv
    # [optional]The configurations of pip and conda.
    configs:
    # If you do not use conda, ignore this field.
    conda:
    condarc: # custom condarc config file
    channels:
    - defaults
    show_channel_urls: true
    default_channels:
    - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main
    - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/r
    - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/msys2
    custom_channels:
    conda-forge: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
    pytorch: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
    pytorch-lts: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
    nvidia: https://mirrors.aliyun.com/anaconda/cloud
    ssl_verify: false
    default_threads: 10
    pip:
    # pip config set global.index-url
    index_url: https://example.org/
    # pip config set global.extra-index-url
    extra_index_url: https://another.net/
    # pip config set install.trusted-host
    trusted_host:
    - example.org
    - another.net
    # [optional] The definition of the environment.
    environment:
    # Now it must be ubuntu:20.04
    os: ubuntu:20.04
    # CUDA version. possible values: 11.3, 11.4, 11.5, 11.6, 11.7
    cuda: 11.4
    # Python version. possible values: 3.7, 3.8, 3.9, 3.10, 3.11
    python: 3.8
    # Define your custom base image
    docker:
    image: mycustom.com/docker/image:tag
    # [required] The dependencies of the Starwhale Runtime.
    dependencies:
    # If this item is present, conda env create -f conda.yml will be executed
    - conda.yaml
    # If this item is present, pip install -r requirements.txt will be executed before installing other pip packages
    - requirements.txt
    # Packages to be install with conda. venv mode will ignore the conda field.
    - conda:
    - numpy
    - requests
    # Packages to be installed with pip. The format is the same as requirements.txt
    - pip:
    - pillow
    - numpy
    - deepspeed==0.9.0
    - safetensors==0.3.0
    - transformers @ git+https://github.com/huggingface/transformers.git@3c3108972af74246bc3a0ecf3259fd2eafbacdef
    - peft @ git+https://github.com/huggingface/peft.git@fcff23f005fc7bfb816ad1f55360442c170cd5f5
    - accelerate @ git+https://github.com/huggingface/accelerate.git@eba6eb79dc2ab652cd8b44b37165a4852768a8ac
    # Additional wheels packages to be installed when restoring the runtime
    - wheels:
    - dummy-0.0.0-py3-none-any.whl
    # Additional files to be included in the runtime
    - files:
    - dest: bin/prepare.sh
    name: prepare
    src: scripts/prepare.sh
    # Run some custom commands
    - commands:
    - apt-get install -y libgl1
    - touch /tmp/runtime-command-run.flag
    - + \ No newline at end of file diff --git a/0.6.0/server/guides/server_admin/index.html b/0.6.0/server/guides/server_admin/index.html index 79e7f03cf..18d7db67d 100644 --- a/0.6.0/server/guides/server_admin/index.html +++ b/0.6.0/server/guides/server_admin/index.html @@ -10,14 +10,14 @@ - +
    Skip to main content
    Version: 0.6.0

    Controller Admin Settings

    Superuser Password Reset

    In case you forget the superusers password, you could use the sql below to reset the password to abcd1234

    update user_info set user_pwd='ee9533077d01d2d65a4efdb41129a91e', user_pwd_salt='6ea18d595773ccc2beacce26' where id=1

    After that, you could login to the console and then change the password to what you really want.

    System Settings

    You could customize system to make it easier to use by leverage of System setting. Here is an example below:

    dockerSetting:
    registryForPull: "docker-registry.starwhale.cn/star-whale"
    registryForPush: ""
    userName: ""
    password: ""
    insecure: true
    pypiSetting:
    indexUrl: ""
    extraIndexUrl: ""
    trustedHost: ""
    retries: 10
    timeout: 90
    imageBuild:
    resourcePool: ""
    image: ""
    clientVersion: ""
    pythonVersion: ""
    datasetBuild:
    resourcePool: ""
    image: ""
    clientVersion: ""
    pythonVersion: ""
    resourcePoolSetting:
    - name: "default"
    nodeSelector: null
    resources:
    - name: "cpu"
    max: null
    min: null
    defaults: 5.0
    - name: "memory"
    max: null
    min: null
    defaults: 3145728.0
    - name: "nvidia.com/gpu"
    max: null
    min: null
    defaults: null
    tolerations: null
    metadata: null
    isPrivate: null
    visibleUserIds: null
    storageSetting:
    - type: "minio"
    tokens:
    bucket: "users"
    ak: "starwhale"
    sk: "starwhale"
    endpoint: "http://10.131.0.1:9000"
    region: "local"
    hugeFileThreshold: "10485760"
    hugeFilePartSize: "5242880"
    - type: "s3"
    tokens:
    bucket: "users"
    ak: "starwhale"b
    sk: "starwhale"
    endpoint: "http://10.131.0.1:9000"
    region: "local"
    hugeFileThreshold: "10485760"
    hugeFilePartSize: "5242880"

    Image Registry

    Tasks dispatched by the server are based on docker images. Pulling these images could be slow if your internet is not working well. Starwhale Server supports the custom image registries, includes dockerSetting.registryForPush and dockerSetting.registryForPull.

    Resource Pool

    The resourcePoolSetting allows you to manage your cluster in a group manner. It is currently implemented by K8S nodeSelector, you could label your machines in K8S cluster and make them a resourcePool in Starwhale.

    Remote Storage

    The storageSetting allows you to manage the storages the server could access.

    storageSetting:
    - type: s3
    tokens:
    - bucket: starwhale # required
    ak: access_key # required
    sk: scret_key # required
    endpoint: http://s3.region.amazonaws.com # optional
    region: region of the service # required when endpoint is empty
    hugeFileThreshold: 10485760 # bigger than 10MB will use multiple part upload
    hugeFilePartSize: 5242880 # MB part size for multiple part upload
    - type: minio
    tokens:
    - bucket: starwhale # required
    ak: access_key # required
    sk: scret_key # required
    endpoint: http://10.131.0.1:9000 # required
    region: local # optional
    hugeFileThreshold: 10485760 # bigger than 10MB will use multiple part upload
    hugeFilePartSize: 5242880 # MB part size for multiple part upload
    - type: aliyun
    tokens:
    - bucket: starwhale # required
    ak: access_key # required
    sk: scret_key # required
    endpoint: http://10.131.0.2:9000 # required
    region: local # optional
    hugeFileThreshold: 10485760 # bigger than 10MB will use multiple part upload
    hugeFilePartSize: 5242880 # MB part size for multiple part upload

    Every storageSetting item has a corresponding implementation of StorageAccessService interface. Starwhale has four build-in implementations:

    • StorageAccessServiceAliyun matches type in (aliyun,oss)
    • StorageAccessServiceMinio matches type in (minio)
    • StorageAccessServiceS3 matches type in (s3)
    • StorageAccessServiceFile matches type in (fs, file)

    Each of the implementations has different requirements for tokens. endpoint is required when type in (aliyun,minio), region is required when type is s3 and endpoint is empty. While fs/file type requires tokens has name rootDir and serviceProvider. Please refer the code for more details.

    - + \ No newline at end of file diff --git a/0.6.0/server/index.html b/0.6.0/server/index.html index ad7685966..c09f3ce41 100644 --- a/0.6.0/server/index.html +++ b/0.6.0/server/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    - + \ No newline at end of file diff --git a/0.6.0/server/installation/docker-compose/index.html b/0.6.0/server/installation/docker-compose/index.html index 65160ebfa..c8aa8756d 100644 --- a/0.6.0/server/installation/docker-compose/index.html +++ b/0.6.0/server/installation/docker-compose/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    Install Starwhale Server with Docker Compose

    Prerequisites

    Usage

    Start up the server

    wget https://raw.githubusercontent.com/star-whale/starwhale/main/docker/compose/compose.yaml
    GLOBAL_IP=${your_accessible_ip_for_server} ; docker compose up

    The GLOBAL_IP is the ip for Controller which could be accessed by all swcli both inside docker containers and other user machines.

    compose.yaml contains Starwhale Controller/MySQL/MinIO services. Touch a compose.override.yaml, as its name implies, can contain configuration overrides for compose.yaml. The available configurations are specified here

    - + \ No newline at end of file diff --git a/0.6.0/server/installation/docker/index.html b/0.6.0/server/installation/docker/index.html index c2f7d0b50..4ed2cea7e 100644 --- a/0.6.0/server/installation/docker/index.html +++ b/0.6.0/server/installation/docker/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    Install Starwhale Server with Docker

    Prerequisites

    • A running Kubernetes 1.19+ cluster to run tasks.
    • A running MySQL 8.0+ instance to store metadata.
    • A S3-compatible object storage to save datasets, models, and others.

    Please make sure pods on the Kubernetes cluster can access the port exposed by the Starwhale Server installation.

    Prepare an env file for Docker

    Starwhale Server can be configured by environment variables.

    An env file template for Docker is here. You may create your own env file by modifying the template.

    Prepare a kubeconfig file [Optional][SW_SCHEDULER=k8s]

    The kubeconfig file is used for accessing the Kubernetes cluster. For more information about kubeconfig files, see the Official Kubernetes Documentation.

    If you have a local kubectl command-line tool installed, you can run kubectl config view to see your current configuration.

    Run the Docker image

    docker run -it -d --name starwhale-server -p 8082:8082 \
    --restart unless-stopped \
    --mount type=bind,source=<path to your kubeconfig file>,destination=/root/.kube/config,readonly \
    --env-file <path to your env file> \
    ghcr.io/star-whale/server:0.5.6

    For users in the mainland of China, use docker image: docker-registry.starwhale.cn/star-whale/server.

    - + \ No newline at end of file diff --git a/0.6.0/server/installation/helm-charts/index.html b/0.6.0/server/installation/helm-charts/index.html index 42e300bae..9903646f0 100644 --- a/0.6.0/server/installation/helm-charts/index.html +++ b/0.6.0/server/installation/helm-charts/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    Install Starwhale Server with Helm

    Prerequisites

    • A running Kubernetes 1.19+ cluster to run tasks.
    • A running MySQL 8.0+ instance to store metadata.
    • A S3-compatible object storage system to save datasets, models, and others.
    • Helm 3.2.0+.

    The Starwhale Helm Charts includes MySQL and MinIO as dependencies. If you do not have your own MySQL instance or any S3-compatible object storage available, use the Helm Charts to install. Please check Installation Options to learn how to install Starwhale Server with MySQL and MinIO.

    Create a service account on Kubernetes for Starwhale Server

    If Kubernetes RBAC is enabled (In Kubernetes 1.6+, RBAC is enabled by default), Starwhale Server can not work properly unless is started by a service account with at least the following permissions:

    ResourceAPI GroupGetListWatchCreateDelete
    jobsbatchYYYYY
    podscoreYYY
    nodescoreYYY
    events""Y

    Example:

    apiVersion: rbac.authorization.k8s.io/v1
    kind: ClusterRole
    metadata:
    name: starwhale-role
    rules:
    - apiGroups:
    - ""
    resources:
    - pods
    - nodes
    verbs:
    - get
    - list
    - watch
    - apiGroups:
    - "batch"
    resources:
    - jobs
    verbs:
    - create
    - get
    - list
    - watch
    - delete
    - apiGroups:
    - ""
    resources:
    - events
    verbs:
    - get
    - watch
    - list
    ---
    apiVersion: rbac.authorization.k8s.io/v1
    kind: ClusterRoleBinding
    metadata:
    name: starwhale-binding
    roleRef:
    apiGroup: rbac.authorization.k8s.io
    kind: ClusterRole
    name: starwhale-role
    subjects:
    - kind: ServiceAccount
    name: starwhale

    Downloading Starwhale Helm Charts

    helm repo add starwhale https://star-whale.github.io/charts
    helm repo update

    Installing Starwhale Server

    helm install starwhale-server starwhale/starwhale-server -n starwhale --create-namespace

    If you have a local kubectl command-line tool installed, you can run kubectl get pods -n starwhale to check if all pods are running.

    Updating Starwhale Server

    helm repo update
    helm upgrade starwhale-server starwhale/starwhale-server

    Uninstalling Starwhale Server

    helm delete starwhale-server
    - + \ No newline at end of file diff --git a/0.6.0/server/installation/index.html b/0.6.0/server/installation/index.html index 17fec3c8a..48049620f 100644 --- a/0.6.0/server/installation/index.html +++ b/0.6.0/server/installation/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    - + \ No newline at end of file diff --git a/0.6.0/server/installation/minikube/index.html b/0.6.0/server/installation/minikube/index.html index b9b76e491..845a0f3b7 100644 --- a/0.6.0/server/installation/minikube/index.html +++ b/0.6.0/server/installation/minikube/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    Install Starwhale Server with Minikube

    Prerequisites

    Starting Minikube

    minikube start --addons ingress --kubernetes-version=1.25.3

    For users in the mainland of China, please add --image-mirror-country=cn parameter. If there is no kubectl bin in your machine, you may use minikube kubectl or alias kubectl="minikube kubectl --" alias command.

    Installing Starwhale Server

    helm repo add starwhale https://star-whale.github.io/charts
    helm repo update
    helm pull starwhale/starwhale --untar --untardir ./charts

    helm upgrade --install starwhale ./charts/starwhale -n starwhale --create-namespace -f ./charts/starwhale/values.minikube.global.yaml

    For users in the mainland of China, use values.minikube.global.yaml:

    helm upgrade --install starwhale ./charts/starwhale -n starwhale --create-namespace -f ./charts/starwhale/values.minikube.cn.yaml

    After the installation is successful, the following prompt message appears:

        Release "starwhale" has been upgraded. Happy Helming!
    NAME: starwhale
    LAST DEPLOYED: Tue Feb 14 16:25:03 2023
    NAMESPACE: starwhale
    STATUS: deployed
    REVISION: 14
    NOTES:
    ******************************************
    Chart Name: starwhale
    Chart Version: 0.5.6
    App Version: latest
    Starwhale Image:
    - server: ghcr.io/star-whale/server:latest

    ******************************************
    Controller:
    - visit: http://controller.starwhale.svc
    Minio:
    - web visit: http://minio.starwhale.svc
    - admin visit: http://minio-admin.starwhale.svc
    MySQL:
    - port-forward:
    - run: kubectl port-forward --namespace starwhale svc/mysql 3306:3306
    - visit: mysql -h 127.0.0.1 -P 3306 -ustarwhale -pstarwhale
    Please run the following command for the domains searching:
    echo "$(sudo minikube ip) controller.starwhale.svc minio.starwhale.svc minio-admin.starwhale.svc " | sudo tee -a /etc/hosts
    ******************************************
    Login Info:
    - starwhale: u:starwhale, p:abcd1234
    - minio admin: u:minioadmin, p:minioadmin

    *_* Enjoy to use Starwhale Platform. *_*

    Checking Starwhale Server status

    Keep checking the minikube service status until all deployments are running(waiting for 3~5 mins):

    kubectl get deployments -n starwhale
    NAMEREADYUP-TO-DATEAVAILABLEAGE
    controller1/1115m
    minio1/1115m
    mysql1/1115m

    Visiting for local

    Make the Starwhale controller accessible locally with the following command:

    echo "$(sudo minikube ip) controller.starwhale.svc minio.starwhale.svc  minio-admin.starwhale.svc " | sudo tee -a /etc/hosts

    Then you can visit http://controller.starwhale.svc in your local web browser.

    Visiting for others

    • Step 1: in the Starwhale Server machine

      for temporary use with socat command:

      # install socat at first, ref: https://howtoinstall.co/en/socat
      sudo socat TCP4-LISTEN:80,fork,reuseaddr,bind=0.0.0.0 TCP4:`minikube ip`:80

      When you kill the socat process, the share access will be blocked. iptables maybe a better choice for long-term use.

    • Step 2: in the other machines

      # for macOSX or Linux environment, run the command in the shell.
      echo ${your_machine_ip} controller.starwhale.svc minio.starwhale.svc minio-admin.starwhale.svc " | sudo tee -a /etc/hosts

      # for Windows environment, run the command in the PowerShell with administrator permission.
      Add-Content -Path C:\Windows\System32\drivers\etc\hosts -Value "`n${your_machine_ip} controller.starwhale.svc minio.starwhale.svc minio-admin.starwhale.svc"
    - + \ No newline at end of file diff --git a/0.6.0/server/installation/starwhale_env/index.html b/0.6.0/server/installation/starwhale_env/index.html index d989967ef..ba3449a39 100644 --- a/0.6.0/server/installation/starwhale_env/index.html +++ b/0.6.0/server/installation/starwhale_env/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    Starwhale Server Environment Example

    ################################################################################
    # *** Required ***
    # The external Starwhale server URL. For example: https://cloud.starwhale.ai
    SW_INSTANCE_URI=

    # The listening port of Starwhale Server
    SW_CONTROLLER_PORT=8082

    # The maximum upload file size. This setting affects datasets and models uploading when copied from outside.
    SW_UPLOAD_MAX_FILE_SIZE=20480MB
    ################################################################################
    # The base URL of the Python Package Index to use when creating a runtime environment.
    SW_PYPI_INDEX_URL=http://10.131.0.1/repository/pypi-hosted/simple/

    # Extra URLs of package indexes to use in addition to the base url.
    SW_PYPI_EXTRA_INDEX_URL=

    # Space separated hostnames. When any host specified in the base URL or extra URLs does not have a valid SSL
    # certification, use this option to trust it anyway.
    SW_PYPI_TRUSTED_HOST=
    ################################################################################
    # The JWT token expiration time. When the token expires, the server will request the user to login again.
    SW_JWT_TOKEN_EXPIRE_MINUTES=43200

    # *** Required ***
    # The JWT secret key. All strings are valid, but we strongly recommend you to use a random string with at least 16 characters.
    SW_JWT_SECRET=
    ################################################################################
    # The scheduler controller to use. Valid values are:
    # docker: Controller schedule jobs by leveraging docker
    # k8s: Controller schedule jobs by leveraging Kubernetes
    SW_SCHEDULER=k8s

    # The Kubernetes namespace to use when running a task when SW_SCHEDULER is k8s
    SW_K8S_NAME_SPACE=default

    # The path on the Kubernetes host node's filesystem to cache Python packages. Use the setting only if you have
    # the permission to use host node's filesystem. The runtime environment setup process may be accelerated when the host
    # path cache is used. Leave it blank if you do not want to use it.
    SW_K8S_HOST_PATH_FOR_CACHE=

    # The ip for the containers created by Controller when SW_SCHEDULER is docker
    SW_DOCKER_CONTAINER_NODE_IP=127.0.0.1
    ###############################################################################
    # *** Required ***
    # The object storage system type. Valid values are:
    # s3: [AWS S3](https://aws.amazon.com/s3) or other s3-compatible object storage systems
    # aliyun: [Aliyun OSS](https://www.alibabacloud.com/product/object-storage-service)
    # minio: [MinIO](https://min.io)
    # file: Local filesystem
    SW_STORAGE_TYPE=

    # The path prefix for all data saved on the storage system.
    SW_STORAGE_PREFIX=
    ################################################################################
    # The following settings are only used when SW_STORAGE_TYPE is file.

    # The root directory to save data.
    # This setting is only used when SW_STORAGE_TYPE is file.
    SW_STORAGE_FS_ROOT_DIR=/usr/local/starwhale
    ################################################################################
    # The following settings are only used when SW_STORAGE_TYPE is not file.

    # *** Required ***
    # The name of the bucket to save data.
    SW_STORAGE_BUCKET=

    # *** Required ***
    # The endpoint URL of the object storage service.
    # This setting is only used when SW_STORAGE_TYPE is s3 or aliyun.
    SW_STORAGE_ENDPOINT=

    # *** Required ***
    # The access key used to access the object storage system.
    SW_STORAGE_ACCESSKEY=

    # *** Required ***
    # The secret access key used to access the object storage system.
    SW_STORAGE_SECRETKEY=

    # *** Optional ***
    # The region of the object storage system.
    SW_STORAGE_REGION=

    # Starwhale Server will use multipart upload when uploading a large file. This setting specifies the part size.
    SW_STORAGE_PART_SIZE=5MB
    ################################################################################
    # MySQL settings

    # *** Required ***
    # The hostname/IP of the MySQL server.
    SW_METADATA_STORAGE_IP=

    # The port of the MySQL server.
    SW_METADATA_STORAGE_PORT=3306

    # *** Required ***
    # The database used by Starwhale Server
    SW_METADATA_STORAGE_DB=starwhale

    # *** Required ***
    # The username of the MySQL server.
    SW_METADATA_STORAGE_USER=

    # *** Required ***
    # The password of the MySQL server.
    SW_METADATA_STORAGE_PASSWORD=
    ################################################################################

    # The cache directory for the WAL files. Point it to a mounted volume or host path with enough space.
    # If not set, the WAL files will be saved in the docker runtime layer, and will be lost when the container is restarted.
    SW_DATASTORE_WAL_LOCAL_CACHE_DIR=
    - + \ No newline at end of file diff --git a/0.6.0/server/project/index.html b/0.6.0/server/project/index.html index 8a5936b60..fba2771cb 100644 --- a/0.6.0/server/project/index.html +++ b/0.6.0/server/project/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    How to Organize and Manage Resources with Starwhale Projects

    Project is the basic unit for organizing and managing resources (such as models, datasets, runtime environments, etc.). You can create and manage projects based on your needs. For example, you can create projects by business team, product line, or models. One user can create and participate in one or more projects.

    Project type

    There are two types of projects:

    • Private project: The project (and related resources in the project) is only visible to project members with permission. Project members can view or edit the project (as well as associated resources in the project). For more information on roles, please take a look at Roles and permissions in Starwhale.

    • Public project: The project (and related resources in the project) is visible to all Starwhale users. Project members can view or edit the project (as well as associated resources in the project). For more information on roles, please take a look at Roles and permissions in Starwhale.

    Create a project

    1. Click the Create button in the upper right corner of the project list page;
    2. Enter a name for the project. Pay attention to avoiding duplicate names. For more information, please see Names in Starwhale
    3. Select the Project Type, which is defaulted to private project and can be selected as public according to needs;
    4. Fill in the description content;
    5. To finish, Click the Submit button.

    Edit a project

    The name, privacy and description of a project can be edited.

    1. Go to the project list page and find the project that needs to be edited by searching for the project name, then click the Edit Project button;
    2. Edit the items that need to be edited;
    3. Click Submit to save the edited content;
    4. If you're editing multiple projects, repeat steps 1 through 3.

    View a project

    My projects

    On the project list page, only my projects are displayed by default. My projects refer to the projects participated in by the current users as project members or project owners.

    Project sorting

    On the project list page, all projects are supported to be sorted by "Recently visited", "Project creation time from new to old", and "Project creation time from old to new", which can be selected according to your needs.

    Delete a project

    Once a project is deleted, all related resources (such as datasets, models, runtimes, evaluations, etc.) will be deleted and cannot be restored.

    1. Enter the project list page and search for the project name to find the project that needs to be deleted. Hover your mouse over the project you want to delete, then click the Delete button;
    2. Follow the prompts, enter the relevant information, click Confirm to delete the project, or click Cancel to cancel the deletion;
    3. If you are deleting multiple projects, repeat the above steps.

    Manage project member

    Only users with the admin role can assign people to the project. The project owner defaulted to having the project owner role.

    Add a member

    1. Click Manage Members to go to the project member list page;
    2. Click the Add Member button in the upper right corner.
    3. Enter the Username you want to add, select a project role for the user in the project.
    4. Click submit to complete.
    5. If you're adding multiple members, repeat steps 1 through 4.

    Remove a member

    1. On the project list page or project overview tab, click Manage Members to go to the project member list page.
    2. Search for the username you want to delete, then click the Delete button.
    3. Click Yes to delete the user from this project, click No to cancel the deletion.
    4. If you're removing multiple members, repeat steps 1 through 3.

    Edit a member's role

    1. Hover your mouse over the project you want to edit, then click Manage Members to go to the project member list page.
    2. Find the username you want to adjust through searching, click the Project Role drop-down menu, and select a new project role. For more information on roles, please take a look at Roles and permissions in Starwhale.
    - + \ No newline at end of file diff --git a/0.6.0/swcli/config/index.html b/0.6.0/swcli/config/index.html index 48240b53b..8b88cc27f 100644 --- a/0.6.0/swcli/config/index.html +++ b/0.6.0/swcli/config/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    Configuration

    Standalone Instance is installed on the user's laptop or development server, providing isolation at the level of Linux/macOX users. Users can install the Starwhale Python package using the pip command and execute any swcli command. After that, they can view their Starwhale configuration in ~/.config/starwhale/config.yaml. In the vast majority of cases, users do not need to manually modify the config.yaml file.

    The ~/.config/starwhale/config.yaml file has permissions set to 0o600 to ensure security, as it contains sensitive information such as encryption keys. Users are advised not to change the file permissions.You could customize your swcli by swci config edit:

    swcli config edit

    config.yaml example

    The typical config.yaml file is as follows:

    • The default instance is local.
    • cloud-cn/cloud-k8s/pre-k8s are the server/cloud instances, local is the standalone instance.
    • The local storage root directory for the Standalone Instance is set to /home/liutianwei/.starwhale.
    current_instance: local
    instances:
    cloud-cn:
    sw_token: ${TOKEN}
    type: cloud
    updated_at: 2022-09-28 18:41:05 CST
    uri: https://cloud.starwhale.cn
    user_name: starwhale
    user_role: normal
    cloud-k8s:
    sw_token: ${TOKEN}
    type: cloud
    updated_at: 2022-09-19 16:10:01 CST
    uri: http://cloud.pre.intra.starwhale.ai
    user_name: starwhale
    user_role: normal
    local:
    current_project: self
    type: standalone
    updated_at: 2022-06-09 16:14:02 CST
    uri: local
    user_name: liutianwei
    pre-k8s:
    sw_token: ${TOKEN}
    type: cloud
    updated_at: 2022-09-19 18:06:50 CST
    uri: http://console.pre.intra.starwhale.ai
    user_name: starwhale
    user_role: normal
    link_auths:
    - ak: starwhale
    bucket: users
    connect_timeout: 10.0
    endpoint: http://10.131.0.1:9000
    read_timeout: 100.0
    sk: starwhale
    type: s3
    storage:
    root: /home/liutianwei/.starwhale
    version: '2.0'

    config.yaml definition

    ParameterDescriptionTypeDefault ValueRequired
    current_instanceThe name of the default instance to use. It is usually set using the swcli instance select command.StringselfYes
    instancesManaged instances, including Standalone, Server and Cloud Instances. There must be at least one Standalone Instance named "local" and one or more Server/Cloud Instances. You can log in to a new instance with swcli instance login and log out from an instance with swcli instance logout.DictStandalone Instance named "local"Yes
    instances.{instance-alias-name}.sw_tokenLogin token for Server/Cloud Instances. It is only effective for Server/Cloud Instances. Subsequent swcli operations on Server/Cloud Instances will use this token. Note that tokens have an expiration time, typically set to one month, which can be configured within the Server/Cloud Instance.StringCloud - Yes, Standalone - No
    instances.{instance-alias-name}.typeType of the instance, currently can only be "cloud" or "standalone".Choice[string]Yes
    instances.{instance-alias-name}.uriFor Server/Cloud Instances, the URI is an http/https address. For Standalone Instances, the URI is set to "local".StringYes
    instances.{instance-alias-name}.user_nameUser's nameStringYes
    instances.{instance-alias-name}.current_projectDefault Project under the current instance. It will be used to fill the "project" field in the URI representation by default. You can set it using the swcli project select command.StringYes
    instances.{instance-alias-name}.user_roleUser's role.StringnormalYes
    instances.{instance-alias-name}.updated_atThe last updated time for this instance configuration.Time format stringYes
    storageSettings related to local storage.DictYes
    storage.rootThe root directory for Standalone Instance's local storage. Typically, if there is insufficient space in the home directory and you manually move data files to another location, you can modify this field.String~/.starwhaleYes
    versionThe version of config.yaml, currently only supports 2.0.String2.0Yes

    You could put starwhale.Link to your assets while the URI in the Link could be whatever(only s3 like or http is implemented) you need, such as s3://10.131.0.1:9000/users/path. However, Links may need to be authed, you could config the auth info in link_auths.

    link_auths:
    - type: s3
    ak: starwhale
    bucket: users
    region: local
    connect_timeout: 10.0
    endpoint: http://10.131.0.1:9000
    read_timeout: 100.0
    sk: starwhale

    Items in link_auths will match the uri in Links automatically. s3 typed link_auth matching Links by looking up bucket and endpoint.

    - + \ No newline at end of file diff --git a/0.6.0/swcli/index.html b/0.6.0/swcli/index.html index 1b30246b4..f82a5c68c 100644 --- a/0.6.0/swcli/index.html +++ b/0.6.0/swcli/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    Starwhale Client (swcli) User Guide

    The Starwhale Client (swcli) is a command-line tool that enables you to interact with Starwhale instances. You can use swcli to complete almost all tasks in Starwhale. swcli is written in pure python3 (require Python 3.7 | 3.11) so that it can be easily installed by the pip command. Currently, swcli only supports Linux and macOS, Windows is coming soon.

    - + \ No newline at end of file diff --git a/0.6.0/swcli/installation/index.html b/0.6.0/swcli/installation/index.html index 8f67e2732..ad71504e2 100644 --- a/0.6.0/swcli/installation/index.html +++ b/0.6.0/swcli/installation/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    Installation Guide

    We can use swcli to complete all tasks for Starwhale Instances. swcli is written by pure python3, which can be installed easily by the pip command.Here are some installation tips that can help you get a cleaner, unambiguous, no dependency conflicts swcli python environment.

    Installing Advice

    DO NOT install Starwhale in your system's global Python environment. It will cause a python dependency conflict problem.

    Prerequisites

    • Python 3.7 ~ 3.11
    • Linux or macOS
    • Conda (optional)

    In the Ubuntu system, you can run the following commands:

    sudo apt-get install python3 python3-venv python3-pip

    #If you want to install multi python versions
    sudo add-apt-repository -y ppa:deadsnakes/ppa
    sudo apt-get update
    sudo apt-get install -y python3.7 python3.8 python3.9 python3-pip python3-venv python3.8-venv python3.7-venv python3.9-venv

    swcli works on macOS. If you run into issues with the default system Python3 on macOS, try installing Python3 through the homebrew:

    brew install python3

    Install swcli

    Install with venv

    python3 -m venv ~/.cache/venv/starwhale
    source ~/.cache/venv/starwhale/bin/activate
    python3 -m pip install starwhale

    swcli --version

    sudo rm -rf /usr/local/bin/swcli
    sudo ln -s `which swcli` /usr/local/bin/

    Install with conda

    conda create --name starwhale --yes  python=3.9
    conda activate starwhale
    python3 -m pip install starwhale

    swcli --version

    sudo rm -rf /usr/local/bin/swcli
    sudo ln -s `which swcli` /usr/local/bin/

    👏 Now, you can use swcli in the global environment.

    Install for the special scenarios

    # for Audio processing
    python -m pip install starwhale[audio]

    # for Image processing
    python -m pip install starwhale[pillow]

    # for swcli model server command
    python -m pip install starwhale[server]

    # for built-in online serving
    python -m pip install starwhale[online-serve]

    # install all dependencies
    python -m pip install starwhale[all]

    Update swcli

    #for venv
    python3 -m pip install --upgrade starwhale

    #for conda
    conda run -n starwhale python3 -m pip install --upgrade starwhale

    Uninstall swcli

    python3 -m pip remove starwhale

    rm -rf ~/.config/starwhale
    rm -rf ~/.starwhale
    - + \ No newline at end of file diff --git a/0.6.0/swcli/swignore/index.html b/0.6.0/swcli/swignore/index.html index 83ec0022f..0d252fd0d 100644 --- a/0.6.0/swcli/swignore/index.html +++ b/0.6.0/swcli/swignore/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    About the .swignore file

    The .swignore file is similar to .gitignore, .dockerignore, and other files used to define ignored files or dirs. The .swignore files mainly used in the Starwhale Model building process. By default, the swcli model build command or starwhale.model.build() Python SDK will traverse all files in the specified directory and automatically exclude certain known files or directories that are not suitable for inclusion in the model package.

    PATTERN FORMAT

    • Each line in a swignore file specifies a pattern, which matches files and directories.
    • A blank line matches no files, so it can serve as a separator for readability.
    • An asterisk * matches anything except a slash.
    • A line starting with # serves as a comment.
    • Support wildcard expression, for example: *.jpg, .png.

    Auto Ingored files or dirs

    If you want to include the auto ingored files or dirs, you can add --add-all for swcli model build command.

    • __pycache__/
    • *.py[cod]
    • *$py.class
    • venv installation dir
    • conda installation dir

    Example

    Here is the .swignore file used in the MNIST example:

    venv/*
    .git/*
    .history*
    .vscode/*
    .venv/*
    data/*
    .idea/*
    *.py[cod]
    - + \ No newline at end of file diff --git a/0.6.0/swcli/uri/index.html b/0.6.0/swcli/uri/index.html index f813f914c..3e5e67b91 100644 --- a/0.6.0/swcli/uri/index.html +++ b/0.6.0/swcli/uri/index.html @@ -10,13 +10,13 @@ - +
    Skip to main content
    Version: 0.6.0

    Starwhale Resources URI

    tip

    Resource URI is widely used in Starwhale client commands. The URI can refer to a resource in the local instance or any other resource in a remote instance. In this way, the Starwhale client can easily manipulate any resource.

    concepts-org.jpg

    Instance URI

    Instance URI can be either:

    • local: standalone instance.
    • [http(s)://]<hostname or ip>[:<port>]: cloud instance with HTTP address.
    • [cloud://]<cloud alias>: cloud or server instance with an alias name, which can be configured in the instance login phase.
    caution

    "local" is different from "localhost". The former means the local standalone instance without a controller, while the latter implies a controller listening at the default port 8082 on the localhost.

    Example:

    # log in Starwhale Cloud; the alias is swcloud
    swcli instance login --username <your account name> --password <your password> https://cloud.starwhale.ai --alias swcloud

    # copy a model from the local instance to the cloud instance
    swcli model copy mnist/version/latest swcloud/project/<your account name>:demo

    # copy a runtime to a Starwhale Server instance: http://localhost:8081
    swcli runtime copy pytorch/version/v1 http://localhost:8081/project/<your account name>:demo

    Project URI

    Project URI is in the format [<Instance URI>/project/]<project name>. If the instance URI is not specified, use the current instance instead.

    Example:

    swcli project select self   # select the self project in the current instance
    swcli project info local/project/self # inspect self project info in the local instance

    Model/Dataset/Runtime URI

    • Model URI: [<Project URI>/model/]<model name>[/version/<version id|tag>].
    • Dataset URI: [<Project URI>/dataset/]<dataset name>[/version/<version id|tag>].
    • Runtime URI: [<Project URI>/runtime/]<runtime name>[/version/<version id|tag>].
    tip
    • swcli supports human-friendly short version id. You can type the first few characters of the version id, provided it is at least four characters long and unambiguous. However, the recover command must use the complete version id.
    • If the project URI is not specified, the default project will be used.
    • You can always use the version tag instead of the version id.

    Example:

    swcli model info mnist/version/hbtdenjxgm4ggnrtmftdgyjzm43tioi  # inspect model info, model name: mnist, version:hbtdenjxgm4ggnrtmftdgyjzm43tioi
    swcli model remove mnist/version/hbtdenj # short version
    swcli model info mnist # inspect mnist model info
    swcli model run mnist --runtime pytorch-mnist --dataset mnist # use the default latest tag

    Job URI

    • format: [<Project URI>/job/]<job id>.
    • If the project URI is not specified, the default project will be used.

    Example:

    swcli job info mezdayjzge3w   # Inspect mezdayjzge3w version in default instance and default project
    swcli job info local/project/self/job/mezday # Inspect the local instance, self project, with short job id:mezday

    The default instance

    When the instance part of a project URI is omitted, the default instance is used instead. The default instance is the one selected by the swcli instance login or swcli instance use command.

    The default project

    When the project parts of Model/Dataset/Runtime/Evaluation URIs are omitted, the default project is used instead. The default project is the one selected by the swcli project use command.

    - + \ No newline at end of file diff --git a/404.html b/404.html index c3a3e9280..dfb971181 100644 --- a/404.html +++ b/404.html @@ -10,13 +10,13 @@ - +
    Skip to main content

    Page Not Found

    We could not find what you were looking for.

    Please contact the owner of the site that linked you to the original URL and let them know their link is broken.

    - + \ No newline at end of file diff --git a/assets/js/2d78f039.7aad882f.js b/assets/js/2d78f039.7193711f.js similarity index 52% rename from assets/js/2d78f039.7aad882f.js rename to assets/js/2d78f039.7193711f.js index d12da8128..b373fb1f7 100644 --- a/assets/js/2d78f039.7aad882f.js +++ b/assets/js/2d78f039.7193711f.js @@ -1 +1 @@ -"use strict";(self.webpackChunkstarwhale_docs=self.webpackChunkstarwhale_docs||[]).push([[9255],{3905:(e,t,n)=>{n.d(t,{Zo:()=>s,kt:()=>p});var a=n(7294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function l(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var c=a.createContext({}),u=function(e){var t=a.useContext(c),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},s=function(e){var t=u(e.components);return a.createElement(c.Provider,{value:t},e.children)},d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},h=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,o=e.originalType,c=e.parentName,s=i(e,["components","mdxType","originalType","parentName"]),h=u(n),p=r,m=h["".concat(c,".").concat(p)]||h[p]||d[p]||o;return n?a.createElement(m,l(l({ref:t},s),{},{components:n})):a.createElement(m,l({ref:t},s))}));function p(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=n.length,l=new Array(o);l[0]=h;var i={};for(var c in t)hasOwnProperty.call(t,c)&&(i[c]=t[c]);i.originalType=e,i.mdxType="string"==typeof e?e:r,l[1]=i;for(var u=2;u{n.r(t),n.d(t,{assets:()=>c,contentTitle:()=>l,default:()=>d,frontMatter:()=>o,metadata:()=>i,toc:()=>u});var a=n(3117),r=(n(7294),n(3905));const o={title:"Getting started with Starwhale Cloud"},l=void 0,i={unversionedId:"getting-started/cloud",id:"getting-started/cloud",title:"Getting started with Starwhale Cloud",description:"Starwhale Cloud is hosted on Aliyun with the domain name . In the futher, we will launch the service on AWS with the domain name . It's important to note that these are two separate instances that are not interconnected, and accounts and data are not shared. You can choose either one to get started.",source:"@site/docs/getting-started/cloud.md",sourceDirName:"getting-started",slug:"/getting-started/cloud",permalink:"/next/getting-started/cloud",draft:!1,editUrl:"https://github.com/star-whale/docs/tree/main/docs/getting-started/cloud.md",tags:[],version:"current",frontMatter:{title:"Getting started with Starwhale Cloud"},sidebar:"mainSidebar",previous:{title:"Getting started with Starwhale Server",permalink:"/next/getting-started/server"},next:{title:"Getting Started with Starwhale Runtime",permalink:"/next/getting-started/runtime"}},c={},u=[{value:"Sign Up for Starwhale Cloud and create your first project",id:"sign-up-for-starwhale-cloud-and-create-your-first-project",level:2},{value:"Build the dataset, model, and runtime on your local machine",id:"build-the-dataset-model-and-runtime-on-your-local-machine",level:2},{value:"Login to the cloud instance",id:"login-to-the-cloud-instance",level:2},{value:"Copy the dataset, model, and runtime to the cloud instance",id:"copy-the-dataset-model-and-runtime-to-the-cloud-instance",level:2},{value:"Run an evaluation with the web UI",id:"run-an-evaluation-with-the-web-ui",level:2}],s={toc:u};function d(e){let{components:t,...n}=e;return(0,r.kt)("wrapper",(0,a.Z)({},s,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("p",null,"Starwhale Cloud is hosted on Aliyun with the domain name ",(0,r.kt)("a",{parentName:"p",href:"https://cloud.starwhale.cn"},"https://cloud.starwhale.cn"),". In the futher, we will launch the service on AWS with the domain name ",(0,r.kt)("a",{parentName:"p",href:"https://cloud.starwhale.ai"},"https://cloud.starwhale.ai"),". It's important to note that these are two separate instances that are not interconnected, and accounts and data are not shared. You can choose either one to get started."),(0,r.kt)("p",null,"You need to install the ",(0,r.kt)("a",{parentName:"p",href:"../swcli"},"Starwhale Client (swcli)")," at first."),(0,r.kt)("h2",{id:"sign-up-for-starwhale-cloud-and-create-your-first-project"},"Sign Up for Starwhale Cloud and create your first project"),(0,r.kt)("p",null,"You can either directly log in with your GitHub or Weixin account or sign up for an account. You will be asked for an account name if you log in with your GitHub or Weixin account."),(0,r.kt)("p",null,"Then you can create a new project. In this tutorial, we will use the name ",(0,r.kt)("inlineCode",{parentName:"p"},"demo")," for the project name."),(0,r.kt)("h2",{id:"build-the-dataset-model-and-runtime-on-your-local-machine"},"Build the dataset, model, and runtime on your local machine"),(0,r.kt)("p",null,"Follow step 1 to step 4 in ",(0,r.kt)("a",{parentName:"p",href:"standalone"},"Getting started with Starwhale Standalone")," to create:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"a Starwhale model named mnist"),(0,r.kt)("li",{parentName:"ul"},"a Starwhale dataset named mnist"),(0,r.kt)("li",{parentName:"ul"},"a Starwhale runtime named pytorch")),(0,r.kt)("h2",{id:"login-to-the-cloud-instance"},"Login to the cloud instance"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-bash"},"swcli instance login --username --password --alias swcloud https://cloud.starwhale.cn\n")),(0,r.kt)("h2",{id:"copy-the-dataset-model-and-runtime-to-the-cloud-instance"},"Copy the dataset, model, and runtime to the cloud instance"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-bash"},"swcli model copy mnist swcloud/project/:demo\nswcli dataset copy mnist swcloud/project/:demo\nswcli runtime copy pytorch swcloud/project/:demo\n")),(0,r.kt)("h2",{id:"run-an-evaluation-with-the-web-ui"},"Run an evaluation with the web UI"),(0,r.kt)("p",null,(0,r.kt)("img",{parentName:"p",src:"https://starwhale-examples.oss-cn-beijing.aliyuncs.com/docs/console-create-job.gif",alt:"console-create-job.gif"})),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Congratulations! You have completed the Starwhale Cloud Getting Started Guide.")))}d.isMDXComponent=!0}}]); \ No newline at end of file +"use strict";(self.webpackChunkstarwhale_docs=self.webpackChunkstarwhale_docs||[]).push([[9255],{3905:(e,t,a)=>{a.d(t,{Zo:()=>s,kt:()=>p});var n=a(7294);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function o(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function l(e){for(var t=1;t=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var c=n.createContext({}),u=function(e){var t=n.useContext(c),a=t;return e&&(a="function"==typeof e?e(t):l(l({},t),e)),a},s=function(e){var t=u(e.components);return n.createElement(c.Provider,{value:t},e.children)},d={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},h=n.forwardRef((function(e,t){var a=e.components,r=e.mdxType,o=e.originalType,c=e.parentName,s=i(e,["components","mdxType","originalType","parentName"]),h=u(a),p=r,m=h["".concat(c,".").concat(p)]||h[p]||d[p]||o;return a?n.createElement(m,l(l({ref:t},s),{},{components:a})):n.createElement(m,l({ref:t},s))}));function p(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=a.length,l=new Array(o);l[0]=h;var i={};for(var c in t)hasOwnProperty.call(t,c)&&(i[c]=t[c]);i.originalType=e,i.mdxType="string"==typeof e?e:r,l[1]=i;for(var u=2;u{a.r(t),a.d(t,{assets:()=>c,contentTitle:()=>l,default:()=>d,frontMatter:()=>o,metadata:()=>i,toc:()=>u});var n=a(3117),r=(a(7294),a(3905));const o={title:"Getting started with Starwhale Cloud"},l=void 0,i={unversionedId:"getting-started/cloud",id:"getting-started/cloud",title:"Getting started with Starwhale Cloud",description:"Starwhale Cloud is hosted on Aliyun with the domain name . In the futher, we will launch the service on AWS with the domain name . It's important to note that these are two separate instances that are not interconnected, and accounts and data are not shared. You can choose either one to get started.",source:"@site/docs/getting-started/cloud.md",sourceDirName:"getting-started",slug:"/getting-started/cloud",permalink:"/next/getting-started/cloud",draft:!1,editUrl:"https://github.com/star-whale/docs/tree/main/docs/getting-started/cloud.md",tags:[],version:"current",frontMatter:{title:"Getting started with Starwhale Cloud"},sidebar:"mainSidebar",previous:{title:"Getting started with Starwhale Server",permalink:"/next/getting-started/server"},next:{title:"Getting Started with Starwhale Runtime",permalink:"/next/getting-started/runtime"}},c={},u=[{value:"Sign Up for Starwhale Cloud and create your first project",id:"sign-up-for-starwhale-cloud-and-create-your-first-project",level:2},{value:"Build the dataset, model, and runtime on your local machine",id:"build-the-dataset-model-and-runtime-on-your-local-machine",level:2},{value:"Login to the cloud instance",id:"login-to-the-cloud-instance",level:2},{value:"Copy the dataset, model, and runtime to the cloud instance",id:"copy-the-dataset-model-and-runtime-to-the-cloud-instance",level:2},{value:"Run an evaluation with the web UI",id:"run-an-evaluation-with-the-web-ui",level:2}],s={toc:u};function d(e){let{components:t,...a}=e;return(0,r.kt)("wrapper",(0,n.Z)({},s,a,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("p",null,"Starwhale Cloud is hosted on Aliyun with the domain name ",(0,r.kt)("a",{parentName:"p",href:"https://cloud.starwhale.cn"},"https://cloud.starwhale.cn"),". In the futher, we will launch the service on AWS with the domain name ",(0,r.kt)("a",{parentName:"p",href:"https://cloud.starwhale.ai"},"https://cloud.starwhale.ai"),". It's important to note that these are two separate instances that are not interconnected, and accounts and data are not shared. You can choose either one to get started."),(0,r.kt)("p",null,"You need to install the ",(0,r.kt)("a",{parentName:"p",href:"../swcli"},"Starwhale Client (swcli)")," at first."),(0,r.kt)("h2",{id:"sign-up-for-starwhale-cloud-and-create-your-first-project"},"Sign Up for Starwhale Cloud and create your first project"),(0,r.kt)("p",null,"You can either directly log in with your GitHub or Weixin account or sign up for an account. You will be asked for an account name if you log in with your GitHub or Weixin account."),(0,r.kt)("p",null,"Then you can create a new project. In this tutorial, we will use the name ",(0,r.kt)("inlineCode",{parentName:"p"},"demo")," for the project name."),(0,r.kt)("h2",{id:"build-the-dataset-model-and-runtime-on-your-local-machine"},"Build the dataset, model, and runtime on your local machine"),(0,r.kt)("p",null,"Follow step 1 to step 4 in ",(0,r.kt)("a",{parentName:"p",href:"standalone"},"Getting started with Starwhale Standalone")," to create:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"a Starwhale model named helloworld"),(0,r.kt)("li",{parentName:"ul"},"a Starwhale dataset named mnist64"),(0,r.kt)("li",{parentName:"ul"},"a Starwhale runtime named helloworld")),(0,r.kt)("h2",{id:"login-to-the-cloud-instance"},"Login to the cloud instance"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-bash"},"swcli instance login --username --password --alias swcloud https://cloud.starwhale.cn\n")),(0,r.kt)("h2",{id:"copy-the-dataset-model-and-runtime-to-the-cloud-instance"},"Copy the dataset, model, and runtime to the cloud instance"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-bash"},"swcli model copy helloworld swcloud/project/:demo\nswcli dataset copy mnist64 swcloud/project/:demo\nswcli runtime copy helloworld swcloud/project/:demo\n")),(0,r.kt)("h2",{id:"run-an-evaluation-with-the-web-ui"},"Run an evaluation with the web UI"),(0,r.kt)("p",null,(0,r.kt)("img",{parentName:"p",src:"https://starwhale-examples.oss-cn-beijing.aliyuncs.com/docs/console-create-job.gif",alt:"console-create-job.gif"})),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Congratulations! You have completed the Starwhale Cloud Getting Started Guide.")))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/42d9f35f.71c9e63a.js b/assets/js/42d9f35f.71c9e63a.js deleted file mode 100644 index 98e7bab87..000000000 --- a/assets/js/42d9f35f.71c9e63a.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunkstarwhale_docs=self.webpackChunkstarwhale_docs||[]).push([[6435],{3905:(e,t,a)=>{a.d(t,{Zo:()=>m,kt:()=>u});var n=a(7294);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function l(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function i(e){for(var t=1;t=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var p=n.createContext({}),s=function(e){var t=n.useContext(p),a=t;return e&&(a="function"==typeof e?e(t):i(i({},t),e)),a},m=function(e){var t=s(e.components);return n.createElement(p.Provider,{value:t},e.children)},d={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},c=n.forwardRef((function(e,t){var a=e.components,r=e.mdxType,l=e.originalType,p=e.parentName,m=o(e,["components","mdxType","originalType","parentName"]),c=s(a),u=r,h=c["".concat(p,".").concat(u)]||c[u]||d[u]||l;return a?n.createElement(h,i(i({ref:t},m),{},{components:a})):n.createElement(h,i({ref:t},m))}));function u(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var l=a.length,i=new Array(l);i[0]=c;var o={};for(var p in t)hasOwnProperty.call(t,p)&&(o[p]=t[p]);o.originalType=e,o.mdxType="string"==typeof e?e:r,i[1]=o;for(var s=2;s{a.r(t),a.d(t,{assets:()=>p,contentTitle:()=>i,default:()=>d,frontMatter:()=>l,metadata:()=>o,toc:()=>s});var n=a(3117),r=(a(7294),a(3905));const l={title:"Getting started with Starwhale Standalone"},i=void 0,o={unversionedId:"getting-started/standalone",id:"getting-started/standalone",title:"Getting started with Starwhale Standalone",description:"When the Starwhale Client (swcli) is installed, you are ready to use Starwhale Standalone.",source:"@site/docs/getting-started/standalone.md",sourceDirName:"getting-started",slug:"/getting-started/standalone",permalink:"/next/getting-started/standalone",draft:!1,editUrl:"https://github.com/star-whale/docs/tree/main/docs/getting-started/standalone.md",tags:[],version:"current",frontMatter:{title:"Getting started with Starwhale Standalone"},sidebar:"mainSidebar",previous:{title:"Getting started",permalink:"/next/getting-started/"},next:{title:"Getting started with Starwhale Server",permalink:"/next/getting-started/server"}},p={},s=[{value:"Downloading Examples",id:"downloading-examples",level:2},{value:"Building a Pytorch Runtime",id:"building-a-pytorch-runtime",level:2},{value:"Building a Model",id:"building-a-model",level:2},{value:"Building a Dataset",id:"building-a-dataset",level:2},{value:"Running an Evaluation Job",id:"running-an-evaluation-job",level:2}],m={toc:s};function d(e){let{components:t,...a}=e;return(0,r.kt)("wrapper",(0,n.Z)({},m,a,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("p",null,"When the ",(0,r.kt)("a",{parentName:"p",href:"../swcli/"},"Starwhale Client (swcli)")," is installed, you are ready to use Starwhale Standalone."),(0,r.kt)("p",null,"We also provide a Jupyter Notebook example, you can try it in ",(0,r.kt)("a",{parentName:"p",href:"https://colab.research.google.com/github/star-whale/starwhale/blob/main/example/notebooks/quickstart-standalone.ipynb"},"Google Colab")," or in your local ",(0,r.kt)("a",{parentName:"p",href:"https://github.com/star-whale/starwhale/blob/main/example/notebooks/quickstart-standalone.ipynb"},"vscode/jupyterlab"),"."),(0,r.kt)("h2",{id:"downloading-examples"},"Downloading Examples"),(0,r.kt)("p",null,"Download Starwhale examples by cloning the Starwhale project via:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-bash"},"GIT_LFS_SKIP_SMUDGE=1 git clone https://github.com/star-whale/starwhale.git --depth 1\ncd starwhale\n")),(0,r.kt)("p",null,"To save time in the example downloading, we skip git-lfs and other commits info. We will use ML/DL HelloWorld code MNIST to start your Starwhale journey. The following steps are all performed in the starwhale directory."),(0,r.kt)("p",null,(0,r.kt)("img",{parentName:"p",src:"https://starwhale-examples.oss-cn-beijing.aliyuncs.com/docs/standalone-core-workflow.gif",alt:"Core Workflow"})),(0,r.kt)("h2",{id:"building-a-pytorch-runtime"},"Building a Pytorch Runtime"),(0,r.kt)("p",null,"Runtime example codes are in the ",(0,r.kt)("inlineCode",{parentName:"p"},"example/runtime/pytorch")," directory."),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("p",{parentName:"li"},"Build the Starwhale runtime bundle:"),(0,r.kt)("pre",{parentName:"li"},(0,r.kt)("code",{parentName:"pre",className:"language-bash"},"swcli runtime build --yaml example/runtime/pytorch/runtime.yaml\n")),(0,r.kt)("admonition",{parentName:"li",type:"tip"},(0,r.kt)("p",{parentName:"admonition"},"When you first build runtime, creating an isolated python environment and downloading python dependencies will take a lot of time. The command execution time is related to the network environment of the machine and the number of packages in the runtime.yaml. Using the befitting pypi mirror and cache config in the ",(0,r.kt)("inlineCode",{parentName:"p"},"~/.pip/pip.conf")," file is a recommended practice."),(0,r.kt)("p",{parentName:"admonition"},"For users in the mainland of China, the following conf file is an option:"),(0,r.kt)("pre",{parentName:"admonition"},(0,r.kt)("code",{parentName:"pre",className:"language-conf"},"[global]\ncache-dir = ~/.cache/pip\nindex-url = https://pypi.tuna.tsinghua.edu.cn/simple\nextra-index-url = https://mirrors.aliyun.com/pypi/simple/\n")))),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("p",{parentName:"li"},"Check your local Starwhale Runtime:"),(0,r.kt)("pre",{parentName:"li"},(0,r.kt)("code",{parentName:"pre",className:"language-bash"},"swcli runtime list\nswcli runtime info pytorch\n")))),(0,r.kt)("h2",{id:"building-a-model"},"Building a Model"),(0,r.kt)("p",null,"Model example codes are in the ",(0,r.kt)("inlineCode",{parentName:"p"},"example/mnist")," directory."),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("p",{parentName:"li"},"Download the pre-trained model file:"),(0,r.kt)("pre",{parentName:"li"},(0,r.kt)("code",{parentName:"pre",className:"language-bash"},"cd example/mnist\nmake download-model\n# For users in the mainland of China, please add `CN=1` environment for make command:\n# CN=1 make download-model\ncd -\n"))),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("p",{parentName:"li"},"Build a Starwhale model:"),(0,r.kt)("pre",{parentName:"li"},(0,r.kt)("code",{parentName:"pre",className:"language-bash"},"swcli model build example/mnist --runtime pytorch\n"))),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("p",{parentName:"li"},"Check your local Starwhale models:"),(0,r.kt)("pre",{parentName:"li"},(0,r.kt)("code",{parentName:"pre",className:"language-bash"},"swcli model list\nswcli model info mnist\n")))),(0,r.kt)("h2",{id:"building-a-dataset"},"Building a Dataset"),(0,r.kt)("p",null,"Dataset example codes are in the ",(0,r.kt)("inlineCode",{parentName:"p"},"example/mnist")," directory."),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("p",{parentName:"li"},"Download the MNIST raw data:"),(0,r.kt)("pre",{parentName:"li"},(0,r.kt)("code",{parentName:"pre",className:"language-bash"},"cd example/mnist\nmake download-data\n# For users in the mainland of China, please add `CN=1` environment for make command:\n# CN=1 make download-data\ncd -\n"))),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("p",{parentName:"li"},"Build a Starwhale dataset:"),(0,r.kt)("pre",{parentName:"li"},(0,r.kt)("code",{parentName:"pre",className:"language-bash"},"swcli dataset build --yaml example/mnist/dataset.yaml\n"))),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("p",{parentName:"li"},"Check your local Starwhale dataset:"),(0,r.kt)("pre",{parentName:"li"},(0,r.kt)("code",{parentName:"pre",className:"language-bash"},"swcli dataset list\nswcli dataset info mnist\nswcli dataset head mnist\n")))),(0,r.kt)("h2",{id:"running-an-evaluation-job"},"Running an Evaluation Job"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("p",{parentName:"li"},"Create an evaluation job:"),(0,r.kt)("pre",{parentName:"li"},(0,r.kt)("code",{parentName:"pre",className:"language-bash"},"swcli -vvv model run --uri mnist --dataset mnist --runtime pytorch\n"))),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("p",{parentName:"li"},"Check the evaluation result"),(0,r.kt)("pre",{parentName:"li"},(0,r.kt)("code",{parentName:"pre",className:"language-bash"},"swcli job list\nswcli job info $(swcli job list | grep mnist | grep success | awk '{print $1}' | head -n 1)\n")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Congratulations! You have completed the Starwhale Standalone Getting Started Guide.")))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/42d9f35f.9ef03e55.js b/assets/js/42d9f35f.9ef03e55.js new file mode 100644 index 000000000..ad472a047 --- /dev/null +++ b/assets/js/42d9f35f.9ef03e55.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunkstarwhale_docs=self.webpackChunkstarwhale_docs||[]).push([[6435],{3905:(e,t,a)=>{a.d(t,{Zo:()=>d,kt:()=>c});var n=a(7294);function l(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function r(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function i(e){for(var t=1;t=0||(l[a]=e[a]);return l}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(l[a]=e[a])}return l}var p=n.createContext({}),s=function(e){var t=n.useContext(p),a=t;return e&&(a="function"==typeof e?e(t):i(i({},t),e)),a},d=function(e){var t=s(e.components);return n.createElement(p.Provider,{value:t},e.children)},m={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},u=n.forwardRef((function(e,t){var a=e.components,l=e.mdxType,r=e.originalType,p=e.parentName,d=o(e,["components","mdxType","originalType","parentName"]),u=s(a),c=l,h=u["".concat(p,".").concat(c)]||u[c]||m[c]||r;return a?n.createElement(h,i(i({ref:t},d),{},{components:a})):n.createElement(h,i({ref:t},d))}));function c(e,t){var a=arguments,l=t&&t.mdxType;if("string"==typeof e||l){var r=a.length,i=new Array(r);i[0]=u;var o={};for(var p in t)hasOwnProperty.call(t,p)&&(o[p]=t[p]);o.originalType=e,o.mdxType="string"==typeof e?e:l,i[1]=o;for(var s=2;s{a.r(t),a.d(t,{assets:()=>p,contentTitle:()=>i,default:()=>m,frontMatter:()=>r,metadata:()=>o,toc:()=>s});var n=a(3117),l=(a(7294),a(3905));const r={title:"Getting started with Starwhale Standalone"},i=void 0,o={unversionedId:"getting-started/standalone",id:"getting-started/standalone",title:"Getting started with Starwhale Standalone",description:"When the Starwhale Client (swcli) is installed, you are ready to use Starwhale Standalone.",source:"@site/docs/getting-started/standalone.md",sourceDirName:"getting-started",slug:"/getting-started/standalone",permalink:"/next/getting-started/standalone",draft:!1,editUrl:"https://github.com/star-whale/docs/tree/main/docs/getting-started/standalone.md",tags:[],version:"current",frontMatter:{title:"Getting started with Starwhale Standalone"},sidebar:"mainSidebar",previous:{title:"Getting started",permalink:"/next/getting-started/"},next:{title:"Getting started with Starwhale Server",permalink:"/next/getting-started/server"}},p={},s=[{value:"Downloading Examples",id:"downloading-examples",level:2},{value:"Building Starwhale Runtime",id:"building-starwhale-runtime",level:2},{value:"Building a Model",id:"building-a-model",level:2},{value:"Building a Dataset",id:"building-a-dataset",level:2},{value:"Running an Evaluation Job",id:"running-an-evaluation-job",level:2}],d={toc:s};function m(e){let{components:t,...a}=e;return(0,l.kt)("wrapper",(0,n.Z)({},d,a,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("p",null,"When the ",(0,l.kt)("a",{parentName:"p",href:"../swcli/"},"Starwhale Client (swcli)")," is installed, you are ready to use Starwhale Standalone."),(0,l.kt)("p",null,"We also provide a Jupyter Notebook example, you can try it in ",(0,l.kt)("a",{parentName:"p",href:"https://colab.research.google.com/github/star-whale/starwhale/blob/main/example/notebooks/quickstart-standalone.ipynb"},"Google Colab")," or in your local ",(0,l.kt)("a",{parentName:"p",href:"https://github.com/star-whale/starwhale/blob/main/example/notebooks/quickstart-standalone.ipynb"},"vscode/jupyterlab"),"."),(0,l.kt)("h2",{id:"downloading-examples"},"Downloading Examples"),(0,l.kt)("p",null,"Download Starwhale examples by cloning the Starwhale project via:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"GIT_LFS_SKIP_SMUDGE=1 git clone https://github.com/star-whale/starwhale.git --depth 1\ncd starwhale\n")),(0,l.kt)("p",null,"To save time in the example downloading, we skip git-lfs and other commits info. We will use ML/DL HelloWorld code MNIST to start your Starwhale journey. The following steps are all performed in the starwhale directory."),(0,l.kt)("p",null,(0,l.kt)("img",{parentName:"p",src:"https://starwhale-examples.oss-cn-beijing.aliyuncs.com/docs/standalone-core-workflow.gif",alt:"Core Workflow"})),(0,l.kt)("h2",{id:"building-starwhale-runtime"},"Building Starwhale Runtime"),(0,l.kt)("p",null,"Runtime example codes are in the ",(0,l.kt)("inlineCode",{parentName:"p"},"example/helloworld")," directory."),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("p",{parentName:"li"},"Build the Starwhale runtime bundle:"),(0,l.kt)("pre",{parentName:"li"},(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"swcli -vvv runtime build --yaml example/helloworld/runtime.yaml\n")),(0,l.kt)("admonition",{parentName:"li",type:"tip"},(0,l.kt)("p",{parentName:"admonition"},"When you first build runtime, creating an isolated python environment and downloading python dependencies will take a lot of time. The command execution time is related to the network environment of the machine and the number of packages in the runtime.yaml. Using the befitting pypi mirror and cache config in the ",(0,l.kt)("inlineCode",{parentName:"p"},"~/.pip/pip.conf")," file is a recommended practice."),(0,l.kt)("p",{parentName:"admonition"},"For users in the mainland of China, the following conf file is an option:"),(0,l.kt)("pre",{parentName:"admonition"},(0,l.kt)("code",{parentName:"pre",className:"language-conf"},"[global]\ncache-dir = ~/.cache/pip\nindex-url = https://pypi.tuna.tsinghua.edu.cn/simple\nextra-index-url = https://mirrors.aliyun.com/pypi/simple/\n")))),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("p",{parentName:"li"},"Check your local Starwhale Runtime:"),(0,l.kt)("pre",{parentName:"li"},(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"swcli runtime list\nswcli runtime info helloworld\n")))),(0,l.kt)("h2",{id:"building-a-model"},"Building a Model"),(0,l.kt)("p",null,"Model example codes are in the ",(0,l.kt)("inlineCode",{parentName:"p"},"example/helloworld")," directory."),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("p",{parentName:"li"},"Build a Starwhale model:"),(0,l.kt)("pre",{parentName:"li"},(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"swcli -vvv model build example/helloworld --name helloworld -m evaluation --runtime helloworld\n"))),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("p",{parentName:"li"},"Check your local Starwhale models:"),(0,l.kt)("pre",{parentName:"li"},(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"swcli model list\nswcli model info helloworld\n")))),(0,l.kt)("h2",{id:"building-a-dataset"},"Building a Dataset"),(0,l.kt)("p",null,"Dataset example codes are in the ",(0,l.kt)("inlineCode",{parentName:"p"},"example/helloworld")," directory."),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("p",{parentName:"li"},"Build a Starwhale dataset:"),(0,l.kt)("pre",{parentName:"li"},(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"swcli runtime activate helloworld\npython3 example/helloworld/dataset.py\ndeactivate\n"))),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("p",{parentName:"li"},"Check your local Starwhale dataset:"),(0,l.kt)("pre",{parentName:"li"},(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"swcli dataset list\nswcli dataset info mnist64\nswcli dataset head mnist64\n")))),(0,l.kt)("h2",{id:"running-an-evaluation-job"},"Running an Evaluation Job"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("p",{parentName:"li"},"Create an evaluation job:"),(0,l.kt)("pre",{parentName:"li"},(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"swcli -vvv model run --uri helloworld --dataset mnist64 --runtime helloworld\n"))),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("p",{parentName:"li"},"Check the evaluation result"),(0,l.kt)("pre",{parentName:"li"},(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"swcli job list\nswcli job info $(swcli job list | grep mnist | grep success | awk '{print $1}' | head -n 1)\n")))),(0,l.kt)("p",null,(0,l.kt)("strong",{parentName:"p"},"Congratulations! You have completed the Starwhale Standalone Getting Started Guide.")))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/e9fbe6ff.a91515bb.js b/assets/js/e9fbe6ff.644fbef9.js similarity index 71% rename from assets/js/e9fbe6ff.a91515bb.js rename to assets/js/e9fbe6ff.644fbef9.js index 3bbeb5761..f183a88c1 100644 --- a/assets/js/e9fbe6ff.a91515bb.js +++ b/assets/js/e9fbe6ff.644fbef9.js @@ -1 +1 @@ -"use strict";(self.webpackChunkstarwhale_docs=self.webpackChunkstarwhale_docs||[]).push([[8620],{3905:(e,t,r)=>{r.d(t,{Zo:()=>u,kt:()=>h});var a=r(7294);function n(e,t,r){return t in e?Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[t]=r,e}function o(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,a)}return r}function l(e){for(var t=1;t=0||(n[r]=e[r]);return n}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(n[r]=e[r])}return n}var s=a.createContext({}),c=function(e){var t=a.useContext(s),r=t;return e&&(r="function"==typeof e?e(t):l(l({},t),e)),r},u=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},p=a.forwardRef((function(e,t){var r=e.components,n=e.mdxType,o=e.originalType,s=e.parentName,u=i(e,["components","mdxType","originalType","parentName"]),p=c(r),h=n,m=p["".concat(s,".").concat(h)]||p[h]||d[h]||o;return r?a.createElement(m,l(l({ref:t},u),{},{components:r})):a.createElement(m,l({ref:t},u))}));function h(e,t){var r=arguments,n=t&&t.mdxType;if("string"==typeof e||n){var o=r.length,l=new Array(o);l[0]=p;var i={};for(var s in t)hasOwnProperty.call(t,s)&&(i[s]=t[s]);i.originalType=e,i.mdxType="string"==typeof e?e:n,l[1]=i;for(var c=2;c{r.r(t),r.d(t,{assets:()=>s,contentTitle:()=>l,default:()=>d,frontMatter:()=>o,metadata:()=>i,toc:()=>c});var a=r(3117),n=(r(7294),r(3905));const o={title:"Getting started with Starwhale Server"},l=void 0,i={unversionedId:"getting-started/server",id:"getting-started/server",title:"Getting started with Starwhale Server",description:"Install Starwhale Server",source:"@site/docs/getting-started/server.md",sourceDirName:"getting-started",slug:"/getting-started/server",permalink:"/next/getting-started/server",draft:!1,editUrl:"https://github.com/star-whale/docs/tree/main/docs/getting-started/server.md",tags:[],version:"current",frontMatter:{title:"Getting started with Starwhale Server"},sidebar:"mainSidebar",previous:{title:"Getting started with Starwhale Standalone",permalink:"/next/getting-started/standalone"},next:{title:"Getting started with Starwhale Cloud",permalink:"/next/getting-started/cloud"}},s={},c=[{value:"Install Starwhale Server",id:"install-starwhale-server",level:2},{value:"Create your first project",id:"create-your-first-project",level:2},{value:"Login to the server",id:"login-to-the-server",level:3},{value:"Create a new project",id:"create-a-new-project",level:3},{value:"Build the dataset, model, and runtime on your local machine",id:"build-the-dataset-model-and-runtime-on-your-local-machine",level:2},{value:"Copy the dataset, the model, and the runtime to the server",id:"copy-the-dataset-the-model-and-the-runtime-to-the-server",level:2},{value:"Use the Web UI to run an evaluation",id:"use-the-web-ui-to-run-an-evaluation",level:2}],u={toc:c};function d(e){let{components:t,...r}=e;return(0,n.kt)("wrapper",(0,a.Z)({},u,r,{components:t,mdxType:"MDXLayout"}),(0,n.kt)("h2",{id:"install-starwhale-server"},"Install Starwhale Server"),(0,n.kt)("p",null,"To install Starwhale Server, see the ",(0,n.kt)("a",{parentName:"p",href:"/next/server/installation/"},"installation guide"),"."),(0,n.kt)("h2",{id:"create-your-first-project"},"Create your first project"),(0,n.kt)("h3",{id:"login-to-the-server"},"Login to the server"),(0,n.kt)("p",null,"Open your browser and enter your server's URL in the address bar. Login with your username(starwhale) and password(abcd1234)."),(0,n.kt)("p",null,(0,n.kt)("img",{parentName:"p",src:"https://starwhale-examples.oss-cn-beijing.aliyuncs.com/docs/console-artifacts.gif",alt:"console-artifacts.gif"})),(0,n.kt)("h3",{id:"create-a-new-project"},"Create a new project"),(0,n.kt)("h2",{id:"build-the-dataset-model-and-runtime-on-your-local-machine"},"Build the dataset, model, and runtime on your local machine"),(0,n.kt)("p",null,"Follow step 1 to step 4 in ",(0,n.kt)("a",{parentName:"p",href:"standalone"},"Getting started with Starwhale Standalone")," to create:"),(0,n.kt)("ul",null,(0,n.kt)("li",{parentName:"ul"},"a Starwhale model named mnist"),(0,n.kt)("li",{parentName:"ul"},"a Starwhale dataset named mnist"),(0,n.kt)("li",{parentName:"ul"},"a Starwhale runtime named pytorch")),(0,n.kt)("h2",{id:"copy-the-dataset-the-model-and-the-runtime-to-the-server"},"Copy the dataset, the model, and the runtime to the server"),(0,n.kt)("pre",null,(0,n.kt)("code",{parentName:"pre",className:"language-bash"},"swcli instance login --username --password --alias server \n\nswcli model copy mnist server/project/demo\nswcli dataset copy mnist server/project/demo\nswcli runtime copy pytorch server/project/demo\n")),(0,n.kt)("h2",{id:"use-the-web-ui-to-run-an-evaluation"},"Use the Web UI to run an evaluation"),(0,n.kt)("p",null,'Navigate to the "demo" project in your browser and create a new one.'),(0,n.kt)("p",null,(0,n.kt)("img",{parentName:"p",src:"https://starwhale-examples.oss-cn-beijing.aliyuncs.com/docs/console-create-job.gif",alt:"console-create-job.gif"})),(0,n.kt)("p",null,(0,n.kt)("strong",{parentName:"p"},"Congratulations! You have completed the Starwhale Server Getting Started Guide.")))}d.isMDXComponent=!0}}]); \ No newline at end of file +"use strict";(self.webpackChunkstarwhale_docs=self.webpackChunkstarwhale_docs||[]).push([[8620],{3905:(e,t,r)=>{r.d(t,{Zo:()=>d,kt:()=>h});var a=r(7294);function n(e,t,r){return t in e?Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[t]=r,e}function o(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,a)}return r}function l(e){for(var t=1;t=0||(n[r]=e[r]);return n}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(n[r]=e[r])}return n}var s=a.createContext({}),c=function(e){var t=a.useContext(s),r=t;return e&&(r="function"==typeof e?e(t):l(l({},t),e)),r},d=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},u={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},p=a.forwardRef((function(e,t){var r=e.components,n=e.mdxType,o=e.originalType,s=e.parentName,d=i(e,["components","mdxType","originalType","parentName"]),p=c(r),h=n,m=p["".concat(s,".").concat(h)]||p[h]||u[h]||o;return r?a.createElement(m,l(l({ref:t},d),{},{components:r})):a.createElement(m,l({ref:t},d))}));function h(e,t){var r=arguments,n=t&&t.mdxType;if("string"==typeof e||n){var o=r.length,l=new Array(o);l[0]=p;var i={};for(var s in t)hasOwnProperty.call(t,s)&&(i[s]=t[s]);i.originalType=e,i.mdxType="string"==typeof e?e:n,l[1]=i;for(var c=2;c{r.r(t),r.d(t,{assets:()=>s,contentTitle:()=>l,default:()=>u,frontMatter:()=>o,metadata:()=>i,toc:()=>c});var a=r(3117),n=(r(7294),r(3905));const o={title:"Getting started with Starwhale Server"},l=void 0,i={unversionedId:"getting-started/server",id:"getting-started/server",title:"Getting started with Starwhale Server",description:"Install Starwhale Server",source:"@site/docs/getting-started/server.md",sourceDirName:"getting-started",slug:"/getting-started/server",permalink:"/next/getting-started/server",draft:!1,editUrl:"https://github.com/star-whale/docs/tree/main/docs/getting-started/server.md",tags:[],version:"current",frontMatter:{title:"Getting started with Starwhale Server"},sidebar:"mainSidebar",previous:{title:"Getting started with Starwhale Standalone",permalink:"/next/getting-started/standalone"},next:{title:"Getting started with Starwhale Cloud",permalink:"/next/getting-started/cloud"}},s={},c=[{value:"Install Starwhale Server",id:"install-starwhale-server",level:2},{value:"Create your first project",id:"create-your-first-project",level:2},{value:"Login to the server",id:"login-to-the-server",level:3},{value:"Create a new project",id:"create-a-new-project",level:3},{value:"Build the dataset, model, and runtime on your local machine",id:"build-the-dataset-model-and-runtime-on-your-local-machine",level:2},{value:"Copy the dataset, the model, and the runtime to the server",id:"copy-the-dataset-the-model-and-the-runtime-to-the-server",level:2},{value:"Use the Web UI to run an evaluation",id:"use-the-web-ui-to-run-an-evaluation",level:2}],d={toc:c};function u(e){let{components:t,...r}=e;return(0,n.kt)("wrapper",(0,a.Z)({},d,r,{components:t,mdxType:"MDXLayout"}),(0,n.kt)("h2",{id:"install-starwhale-server"},"Install Starwhale Server"),(0,n.kt)("p",null,"To install Starwhale Server, see the ",(0,n.kt)("a",{parentName:"p",href:"/next/server/installation/"},"installation guide"),"."),(0,n.kt)("h2",{id:"create-your-first-project"},"Create your first project"),(0,n.kt)("h3",{id:"login-to-the-server"},"Login to the server"),(0,n.kt)("p",null,"Open your browser and enter your server's URL in the address bar. Login with your username(starwhale) and password(abcd1234)."),(0,n.kt)("p",null,(0,n.kt)("img",{parentName:"p",src:"https://starwhale-examples.oss-cn-beijing.aliyuncs.com/docs/console-artifacts.gif",alt:"console-artifacts.gif"})),(0,n.kt)("h3",{id:"create-a-new-project"},"Create a new project"),(0,n.kt)("h2",{id:"build-the-dataset-model-and-runtime-on-your-local-machine"},"Build the dataset, model, and runtime on your local machine"),(0,n.kt)("p",null,"Follow step 1 to step 4 in ",(0,n.kt)("a",{parentName:"p",href:"standalone"},"Getting started with Starwhale Standalone")," to create:"),(0,n.kt)("ul",null,(0,n.kt)("li",{parentName:"ul"},"a Starwhale model named helloworld"),(0,n.kt)("li",{parentName:"ul"},"a Starwhale dataset named mnist64"),(0,n.kt)("li",{parentName:"ul"},"a Starwhale runtime named helloworld")),(0,n.kt)("h2",{id:"copy-the-dataset-the-model-and-the-runtime-to-the-server"},"Copy the dataset, the model, and the runtime to the server"),(0,n.kt)("pre",null,(0,n.kt)("code",{parentName:"pre",className:"language-bash"},"swcli instance login --username --password --alias server \n\nswcli model copy helloworld server/project/demo\nswcli dataset copy mnist64 server/project/demo\nswcli runtime copy helloworld server/project/demo\n")),(0,n.kt)("h2",{id:"use-the-web-ui-to-run-an-evaluation"},"Use the Web UI to run an evaluation"),(0,n.kt)("p",null,'Navigate to the "demo" project in your browser and create a new one.'),(0,n.kt)("p",null,(0,n.kt)("img",{parentName:"p",src:"https://starwhale-examples.oss-cn-beijing.aliyuncs.com/docs/console-create-job.gif",alt:"console-create-job.gif"})),(0,n.kt)("p",null,(0,n.kt)("strong",{parentName:"p"},"Congratulations! You have completed the Starwhale Server Getting Started Guide.")))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/runtime~main.08eddf9a.js b/assets/js/runtime~main.4ebe5f19.js similarity index 98% rename from assets/js/runtime~main.08eddf9a.js rename to assets/js/runtime~main.4ebe5f19.js index cf3beb9a6..43063195d 100644 --- a/assets/js/runtime~main.08eddf9a.js +++ b/assets/js/runtime~main.4ebe5f19.js @@ -1 +1 @@ -(()=>{"use strict";var e,f,a,b,d,c={},t={};function r(e){var f=t[e];if(void 0!==f)return f.exports;var a=t[e]={exports:{}};return c[e].call(a.exports,a,a.exports,r),a.exports}r.m=c,e=[],r.O=(f,a,b,d)=>{if(!a){var c=1/0;for(i=0;i=d)&&Object.keys(r.O).every((e=>r.O[e](a[o])))?a.splice(o--,1):(t=!1,d0&&e[i-1][2]>d;i--)e[i]=e[i-1];e[i]=[a,b,d]},r.n=e=>{var f=e&&e.__esModule?()=>e.default:()=>e;return r.d(f,{a:f}),f},a=Object.getPrototypeOf?e=>Object.getPrototypeOf(e):e=>e.__proto__,r.t=function(e,b){if(1&b&&(e=this(e)),8&b)return e;if("object"==typeof e&&e){if(4&b&&e.__esModule)return e;if(16&b&&"function"==typeof e.then)return e}var d=Object.create(null);r.r(d);var c={};f=f||[null,a({}),a([]),a(a)];for(var t=2&b&&e;"object"==typeof t&&!~f.indexOf(t);t=a(t))Object.getOwnPropertyNames(t).forEach((f=>c[f]=()=>e[f]));return c.default=()=>e,r.d(d,c),d},r.d=(e,f)=>{for(var a in f)r.o(f,a)&&!r.o(e,a)&&Object.defineProperty(e,a,{enumerable:!0,get:f[a]})},r.f={},r.e=e=>Promise.all(Object.keys(r.f).reduce(((f,a)=>(r.f[a](e,f),f)),[])),r.u=e=>"assets/js/"+({53:"935f2afb",70:"b4266ab5",74:"3762e359",75:"93232c32",125:"cc78421f",137:"e6b210f1",143:"ae57ea02",168:"71ff360b",190:"ba836e7d",232:"56d53d53",304:"3480b943",379:"b5684a7b",436:"27414590",474:"24a48cd0",489:"f972728b",503:"5eb6fda8",527:"ffff3183",533:"b2b675dd",556:"2729f289",705:"bc50734c",758:"023934a0",790:"207b7cff",838:"44b1cbe2",846:"f7950235",904:"2545d4b6",953:"bae51714",988:"9bdeab26",996:"39af834a",1002:"fe6343fd",1013:"91ab3747",1014:"81c352c7",1023:"be6c2ff2",1061:"cbad36d9",1094:"07b3ca0b",1120:"eeb12725",1128:"0513b4b7",1140:"91edb5cf",1143:"689aaa3d",1171:"d9beab61",1224:"a56f2bca",1237:"2e5465c5",1257:"03431110",1368:"986a7b24",1394:"5a391425",1405:"49b4d3fe",1433:"c3542997",1434:"5f38f66e",1435:"7684512e",1477:"b2f554cd",1484:"25243afb",1538:"19af0c64",1579:"521740bf",1655:"899d5fe0",1678:"923434ee",1684:"93f00860",1689:"f6ac3114",1690:"e4b75637",1695:"ff74d3da",1713:"a7023ddc",1722:"fe1b78e1",1745:"d7efef2f",1756:"0661cc41",1793:"07e54361",1840:"6cbd7e7d",1857:"bd7d9199",1913:"373b159b",1933:"8195011d",1947:"fbde1876",1955:"2a436d5c",1985:"ef1be1e1",1988:"9f58059d",2106:"8f11fbb5",2112:"bde18961",2115:"a0a891b7",2150:"4f7fe039",2157:"0b0df7a2",2161:"90d098bd",2165:"e53d3ff9",2286:"35b14ade",2308:"e4f6b8e1",2313:"b879cbc2",2329:"b40c3376",2366:"d832a854",2416:"06309b2a",2493:"58f10d9f",2500:"4f907a97",2523:"2e0ef41a",2529:"a23fbffc",2535:"814f3328",2615:"116fecd1",2669:"963797ee",2685:"0c3559ad",2739:"61356d5d",2748:"3853ad19",2818:"8de92970",2828:"74d883b7",2995:"af0debe5",3006:"1b7cc7bc",3018:"d15112ec",3057:"9b79081a",3080:"c49571eb",3089:"a6aa9e1f",3154:"e1105187",3189:"4511f06b",3215:"236d8693",3228:"481b4727",3229:"7f26efeb",3231:"1329fb4f",3233:"32528799",3288:"5648656a",3321:"0dbd89b2",3368:"a3eb7131",3369:"4f239cc1",3420:"00ebe6f9",3492:"f5b6cb08",3559:"0bd945fa",3608:"9e4087bc",3667:"f6fec203",3727:"f1e36233",3840:"73c5427a",3859:"bca0dfde",3881:"589c66ec",3918:"c592492b",3919:"5d3ff7ab",3952:"6edc6741",4007:"77daf463",4008:"1a1c0fb0",4011:"9b1574cb",4013:"01a85c17",4034:"56383101",4060:"2914bf67",4106:"bd9c0894",4121:"6b5d17d2",4130:"d34d6740",4193:"2572f700",4198:"f3650d5d",4230:"327c535f",4272:"a5adff03",4319:"b3c9b7e8",4322:"ddb3d303",4351:"7beeba1c",4355:"22ced7ff",4433:"d218f8f7",4446:"3b7875ca",4447:"d83ef4b5",4457:"fcfb8e31",4472:"3f90d064",4475:"4012ba53",4527:"8ba41740",4528:"f012d72b",4535:"e36a0948",4537:"9cf37abf",4540:"1cd68b1e",4567:"20dac1bd",4571:"5936e3f8",4608:"3818d7db",4720:"34f595d3",4758:"38311505",4805:"26df6cbd",4848:"d29f8d9b",4856:"bed23bc4",4885:"5f0d6fdb",4893:"d22055dc",4896:"7a8da0ce",4927:"6f99d302",4934:"88015853",4941:"1efab335",4948:"c3c8b115",5032:"d3fd6aa5",5057:"5c2ad240",5073:"63f3ccc1",5101:"959b44ee",5113:"18c9cfd4",5133:"ffe4100e",5150:"dbe33f09",5173:"7d733c18",5182:"4645ad56",5208:"a25d6fd7",5210:"ca3e8775",5258:"c0d50cc0",5281:"1cda5aa6",5387:"dcfe1bde",5416:"fb1f8cbb",5451:"b7557c51",5491:"d2453d90",5536:"5a4ad223",5620:"8e04f48d",5689:"c2728190",5761:"ab388152",5762:"877d4050",5870:"ac51e66e",5875:"c0fee9fd",5881:"78d62bd9",5938:"5bb31039",5995:"aa10845f",6020:"4db21eee",6088:"88988c18",6103:"ccc49370",6208:"486179e1",6230:"c886740e",6235:"57f5c722",6237:"9ba654c2",6251:"0430ce14",6318:"1609ca8c",6329:"54c82979",6336:"6763b9d9",6338:"4528a46e",6366:"6e1f8ce6",6392:"f6305a2a",6398:"aefeddaf",6425:"f051bb65",6435:"42d9f35f",6443:"65c6927d",6508:"797023eb",6612:"ec8a462b",6618:"2d40f4be",6705:"4888691f",6736:"afc2f83f",6765:"a1612d77",6792:"2ea1e391",6809:"2defc614",6849:"4b44443b",6862:"3521e0c7",6869:"8fb4711f",6881:"cfd4e1da",6909:"8e3c9231",6912:"51c1bc08",6966:"552162b0",7054:"5431a54b",7181:"fa377e30",7183:"32cba7ce",7197:"e8d59815",7199:"7a93542f",7208:"5b72acc5",7220:"e7c33aac",7346:"f2cc7669",7372:"8b2d4da3",7392:"afaa6f85",7400:"19623007",7490:"fbf0a0a7",7567:"df36ecd4",7647:"1ddcdff5",7656:"80151786",7659:"83e43ff1",7697:"50417919",7749:"3160d5c7",7796:"e14c639a",7813:"377d34c2",7830:"568f204d",7833:"74882eab",7854:"40e2e448",7904:"206e8b40",7918:"17896441",7968:"ead21b0a",7983:"b251fb47",7996:"b4161e04",8018:"78886a16",8097:"f8292b17",8114:"41c3269f",8158:"1cd0502b",8242:"272c7b59",8244:"305f83c8",8271:"1c091541",8279:"9d1c829d",8309:"ac72f4d5",8333:"aa126475",8337:"208d09d7",8419:"fa7c6226",8461:"a32436d0",8472:"51f472b9",8481:"5e73aff3",8592:"70926518",8610:"6875c492",8619:"605a1123",8620:"e9fbe6ff",8634:"97affa74",8685:"6f13de77",8766:"4696e759",8857:"68ba87f2",8876:"99977c84",8884:"c757b298",8963:"f3f1a75b",9009:"7578b5f6",9015:"eb575f18",9033:"9dc553d4",9130:"f35e473a",9160:"feec69fc",9205:"7c8cfcaa",9221:"35a1304b",9247:"a6703bbf",9255:"2d78f039",9322:"f2534a3f",9324:"e2cfa70e",9334:"247783bb",9340:"74da7579",9382:"1daa9b51",9391:"cc120547",9404:"9f104ddb",9487:"7eb32d37",9514:"1be78505",9546:"e44ab7b1",9680:"d567a5f3",9687:"978f5c7d",9706:"cddb67a8",9716:"286cdff1",9726:"0ca68b49",9774:"43b1a21e",9824:"fe1659de",9859:"06024424",9874:"febe53b7",9875:"7d188f18",9878:"3b5b6856",9914:"29713cec",9927:"53e20daa",9945:"347c37ac"}[e]||e)+"."+{53:"85bd2ad0",70:"a79cc619",74:"87ad580e",75:"ba49f655",125:"ee166a0d",137:"b5f453ea",143:"0b3ce843",168:"8f759dee",190:"d5d394c6",232:"68d2210f",304:"444b49c9",379:"eedec262",436:"480cabac",474:"4a050319",489:"23adbdd8",503:"aef40381",527:"0e0ecb22",533:"d88e0aaf",556:"d3f2db64",705:"cd7b8395",758:"a3c06b83",790:"380e80e6",838:"640d37fc",846:"727c9384",904:"bc96a6bb",953:"e4ce4a2b",988:"ee8b1062",996:"286c6647",1002:"e6f443a6",1013:"3809f8e7",1014:"fcf2bffa",1023:"28dc2d23",1061:"7ea2864f",1094:"5dab0728",1120:"4f6f2aea",1128:"65ce1a56",1140:"00516883",1143:"4aa6b232",1171:"22b58cfd",1224:"e047a4aa",1237:"d347a96c",1257:"f3d175d1",1368:"c2a98948",1394:"d205670c",1405:"d65dd38b",1433:"3d44c209",1434:"40edff1c",1435:"a7d2d575",1477:"fdbd6fcf",1484:"a6301a44",1538:"a07a5df3",1579:"b46d5049",1655:"f58a3749",1678:"da94a1a2",1684:"15190a6e",1689:"f1e4ca63",1690:"6f99d9a7",1695:"773cf80b",1713:"76202190",1722:"9d24017b",1745:"49386147",1756:"504542ca",1793:"2db2c7f2",1840:"858dbec8",1857:"564089b8",1913:"af9a2108",1933:"9f9f4633",1947:"94fdf9d0",1955:"46a13fb4",1985:"3301c264",1988:"a61c95d1",2106:"e7fd625d",2112:"678faea1",2115:"282b0f22",2150:"552e629c",2157:"44e55818",2161:"d1c3f115",2165:"3a7fdbb2",2286:"641cef90",2308:"f587da2e",2313:"0518b035",2329:"2ea6f5de",2366:"adf72713",2416:"12db4505",2493:"3a75331c",2500:"88539fd0",2523:"0e7ae1f2",2529:"552f8bdf",2535:"43468b6f",2615:"a546b37b",2669:"e18c6896",2685:"22362746",2739:"83e9f664",2748:"30f9b15d",2818:"93e3f3be",2828:"1a0fd7be",2995:"ff09bfcf",3006:"d5ffdd2d",3018:"d3d18586",3057:"68d75a2e",3080:"01ba5041",3089:"63fb042a",3154:"e226b3ec",3189:"49975c09",3215:"1fd6a106",3228:"bbb01b48",3229:"c17da2b2",3231:"c09cb887",3233:"b4c4f1e7",3288:"00d9a459",3321:"e6727e42",3368:"d2126698",3369:"c8149385",3420:"8f65ca77",3492:"77dde2ae",3559:"861d1cd6",3608:"b5df34c4",3667:"b8beac18",3727:"1d5ad730",3840:"dd501e0c",3859:"fbebe9d2",3881:"f6526055",3918:"fd6cf91c",3919:"ac9fa7d0",3952:"dc17346c",4007:"369d07e6",4008:"44997440",4011:"6486f29c",4013:"3480387e",4034:"76766c1d",4060:"3ad386a4",4106:"e14f178f",4121:"d5df942b",4130:"2f955dfe",4193:"4bdae46c",4198:"36f45c29",4230:"0bbadb02",4272:"bf82ef27",4319:"b3c51855",4322:"b8638c03",4351:"940a7a93",4355:"b019cd60",4433:"a5175022",4446:"89d6669a",4447:"561c12fc",4457:"42d275cb",4472:"68994fb4",4475:"3263c914",4527:"c95de201",4528:"3fd74978",4535:"908bf7a5",4537:"f48159e4",4540:"fafa4397",4567:"e9375ffa",4571:"cc486c9b",4608:"0c6e4d41",4720:"e6fd5ebc",4758:"4e45b796",4805:"4d0de1df",4848:"0aa90d3a",4856:"bd6ff808",4885:"73e3e49a",4893:"cd033837",4896:"b7e1346b",4927:"7b00ca95",4934:"a8b8a645",4941:"bb692913",4948:"cb5d07f7",4972:"fcd66616",5032:"a97e2ad7",5057:"f3e747ba",5073:"c99d1438",5101:"91ea1946",5113:"9c0d55ef",5133:"11018c6f",5150:"ccf2ee94",5173:"fd0380d6",5182:"02cda888",5208:"856c8726",5210:"2ec59f11",5258:"1b02c088",5281:"3acaed66",5387:"fbc7080c",5416:"39d2c6e8",5451:"6a120073",5491:"edfc45f3",5536:"865c303a",5620:"5f24bdb0",5689:"89ee60ee",5761:"569d588c",5762:"46db3612",5870:"b410b0b7",5875:"21a38cc3",5881:"6c8ff055",5938:"4dc2a87d",5995:"3f0dd825",6020:"89dce336",6048:"341e3f6b",6088:"efa01055",6103:"6909e0f1",6208:"c536619f",6230:"cc140ea1",6235:"30d1f5ee",6237:"e505b091",6251:"0c5c366e",6318:"3705d074",6329:"25d32984",6336:"db937c1e",6338:"81e3d85f",6366:"c124a60e",6392:"878e8356",6398:"7ce9e498",6425:"b78ea8ec",6435:"71c9e63a",6443:"980dba48",6508:"ba631703",6612:"9b10c857",6618:"4b912f7d",6705:"c75edc81",6736:"1a84d8f4",6765:"22e6b1d2",6792:"f33527dc",6809:"56a01b31",6849:"0a89ad00",6862:"e2e0495a",6869:"b5364e1f",6881:"c2743313",6909:"f16e1b6e",6912:"37dfa78c",6966:"32f80aa9",7054:"cf2347e8",7181:"0659a7c8",7183:"74c7250f",7197:"626cced9",7199:"924d4be4",7208:"92ca1881",7220:"5c711f43",7346:"40065e23",7372:"60564700",7392:"0179230a",7400:"950813ce",7490:"4e93c0d9",7567:"e82440b5",7647:"64d03f63",7656:"6950c92b",7659:"297e84d6",7697:"280f4fc5",7749:"33bb4054",7796:"e02647c4",7813:"88beb2cf",7830:"180b0d92",7833:"9cb9fe64",7854:"48c391b2",7904:"24ede80b",7918:"df16294d",7968:"f58059ef",7983:"1c61b8de",7996:"021059d3",8018:"39e80a27",8097:"b68a870b",8114:"86cb582f",8158:"3a06108a",8242:"7efc152e",8244:"4b33707a",8271:"05841d70",8279:"4a118ab0",8309:"7482cfa6",8333:"cede7f8c",8337:"70e836a7",8357:"77417755",8419:"2a9654c3",8461:"030ce541",8472:"fc23dee2",8481:"c33e25af",8592:"6ebfe92f",8610:"14c8e2db",8619:"14f82cb1",8620:"a91515bb",8634:"45c89ee4",8685:"2fb3216a",8766:"14090488",8857:"553757d8",8876:"1e438729",8884:"dccb4039",8963:"aa069220",9009:"20ffe920",9015:"1743a961",9033:"d86494a3",9130:"66c14487",9160:"33c46af5",9205:"cdd130de",9221:"18815ffb",9247:"1f4e454d",9255:"7aad882f",9322:"76531c3a",9324:"d2024120",9334:"8199a693",9340:"fa6012b1",9382:"67a981a0",9391:"d957d197",9404:"6760c531",9487:"ae9e3f4a",9514:"fc89e98a",9546:"f48dbc7f",9680:"4923e9c1",9687:"c9ff2d8b",9706:"9af06ac3",9716:"3bef1182",9726:"6539670d",9774:"16ab296c",9824:"ecab8fb0",9859:"e8730cf9",9874:"a43c45cc",9875:"e906463f",9878:"2eaa431b",9914:"524f1554",9927:"a50baa46",9945:"12b933b1"}[e]+".js",r.miniCssF=e=>{},r.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||new Function("return this")()}catch(e){if("object"==typeof window)return window}}(),r.o=(e,f)=>Object.prototype.hasOwnProperty.call(e,f),b={},d="starwhale-docs:",r.l=(e,f,a,c)=>{if(b[e])b[e].push(f);else{var t,o;if(void 0!==a)for(var n=document.getElementsByTagName("script"),i=0;i{t.onerror=t.onload=null,clearTimeout(s);var d=b[e];if(delete b[e],t.parentNode&&t.parentNode.removeChild(t),d&&d.forEach((e=>e(a))),f)return f(a)},s=setTimeout(l.bind(null,void 0,{type:"timeout",target:t}),12e4);t.onerror=l.bind(null,t.onerror),t.onload=l.bind(null,t.onload),o&&document.head.appendChild(t)}},r.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},r.p="/",r.gca=function(e){return e={17896441:"7918",19623007:"7400",27414590:"436",32528799:"3233",38311505:"4758",50417919:"7697",56383101:"4034",70926518:"8592",80151786:"7656",88015853:"4934","935f2afb":"53",b4266ab5:"70","3762e359":"74","93232c32":"75",cc78421f:"125",e6b210f1:"137",ae57ea02:"143","71ff360b":"168",ba836e7d:"190","56d53d53":"232","3480b943":"304",b5684a7b:"379","24a48cd0":"474",f972728b:"489","5eb6fda8":"503",ffff3183:"527",b2b675dd:"533","2729f289":"556",bc50734c:"705","023934a0":"758","207b7cff":"790","44b1cbe2":"838",f7950235:"846","2545d4b6":"904",bae51714:"953","9bdeab26":"988","39af834a":"996",fe6343fd:"1002","91ab3747":"1013","81c352c7":"1014",be6c2ff2:"1023",cbad36d9:"1061","07b3ca0b":"1094",eeb12725:"1120","0513b4b7":"1128","91edb5cf":"1140","689aaa3d":"1143",d9beab61:"1171",a56f2bca:"1224","2e5465c5":"1237","03431110":"1257","986a7b24":"1368","5a391425":"1394","49b4d3fe":"1405",c3542997:"1433","5f38f66e":"1434","7684512e":"1435",b2f554cd:"1477","25243afb":"1484","19af0c64":"1538","521740bf":"1579","899d5fe0":"1655","923434ee":"1678","93f00860":"1684",f6ac3114:"1689",e4b75637:"1690",ff74d3da:"1695",a7023ddc:"1713",fe1b78e1:"1722",d7efef2f:"1745","0661cc41":"1756","07e54361":"1793","6cbd7e7d":"1840",bd7d9199:"1857","373b159b":"1913","8195011d":"1933",fbde1876:"1947","2a436d5c":"1955",ef1be1e1:"1985","9f58059d":"1988","8f11fbb5":"2106",bde18961:"2112",a0a891b7:"2115","4f7fe039":"2150","0b0df7a2":"2157","90d098bd":"2161",e53d3ff9:"2165","35b14ade":"2286",e4f6b8e1:"2308",b879cbc2:"2313",b40c3376:"2329",d832a854:"2366","06309b2a":"2416","58f10d9f":"2493","4f907a97":"2500","2e0ef41a":"2523",a23fbffc:"2529","814f3328":"2535","116fecd1":"2615","963797ee":"2669","0c3559ad":"2685","61356d5d":"2739","3853ad19":"2748","8de92970":"2818","74d883b7":"2828",af0debe5:"2995","1b7cc7bc":"3006",d15112ec:"3018","9b79081a":"3057",c49571eb:"3080",a6aa9e1f:"3089",e1105187:"3154","4511f06b":"3189","236d8693":"3215","481b4727":"3228","7f26efeb":"3229","1329fb4f":"3231","5648656a":"3288","0dbd89b2":"3321",a3eb7131:"3368","4f239cc1":"3369","00ebe6f9":"3420",f5b6cb08:"3492","0bd945fa":"3559","9e4087bc":"3608",f6fec203:"3667",f1e36233:"3727","73c5427a":"3840",bca0dfde:"3859","589c66ec":"3881",c592492b:"3918","5d3ff7ab":"3919","6edc6741":"3952","77daf463":"4007","1a1c0fb0":"4008","9b1574cb":"4011","01a85c17":"4013","2914bf67":"4060",bd9c0894:"4106","6b5d17d2":"4121",d34d6740:"4130","2572f700":"4193",f3650d5d:"4198","327c535f":"4230",a5adff03:"4272",b3c9b7e8:"4319",ddb3d303:"4322","7beeba1c":"4351","22ced7ff":"4355",d218f8f7:"4433","3b7875ca":"4446",d83ef4b5:"4447",fcfb8e31:"4457","3f90d064":"4472","4012ba53":"4475","8ba41740":"4527",f012d72b:"4528",e36a0948:"4535","9cf37abf":"4537","1cd68b1e":"4540","20dac1bd":"4567","5936e3f8":"4571","3818d7db":"4608","34f595d3":"4720","26df6cbd":"4805",d29f8d9b:"4848",bed23bc4:"4856","5f0d6fdb":"4885",d22055dc:"4893","7a8da0ce":"4896","6f99d302":"4927","1efab335":"4941",c3c8b115:"4948",d3fd6aa5:"5032","5c2ad240":"5057","63f3ccc1":"5073","959b44ee":"5101","18c9cfd4":"5113",ffe4100e:"5133",dbe33f09:"5150","7d733c18":"5173","4645ad56":"5182",a25d6fd7:"5208",ca3e8775:"5210",c0d50cc0:"5258","1cda5aa6":"5281",dcfe1bde:"5387",fb1f8cbb:"5416",b7557c51:"5451",d2453d90:"5491","5a4ad223":"5536","8e04f48d":"5620",c2728190:"5689",ab388152:"5761","877d4050":"5762",ac51e66e:"5870",c0fee9fd:"5875","78d62bd9":"5881","5bb31039":"5938",aa10845f:"5995","4db21eee":"6020","88988c18":"6088",ccc49370:"6103","486179e1":"6208",c886740e:"6230","57f5c722":"6235","9ba654c2":"6237","0430ce14":"6251","1609ca8c":"6318","54c82979":"6329","6763b9d9":"6336","4528a46e":"6338","6e1f8ce6":"6366",f6305a2a:"6392",aefeddaf:"6398",f051bb65:"6425","42d9f35f":"6435","65c6927d":"6443","797023eb":"6508",ec8a462b:"6612","2d40f4be":"6618","4888691f":"6705",afc2f83f:"6736",a1612d77:"6765","2ea1e391":"6792","2defc614":"6809","4b44443b":"6849","3521e0c7":"6862","8fb4711f":"6869",cfd4e1da:"6881","8e3c9231":"6909","51c1bc08":"6912","552162b0":"6966","5431a54b":"7054",fa377e30:"7181","32cba7ce":"7183",e8d59815:"7197","7a93542f":"7199","5b72acc5":"7208",e7c33aac:"7220",f2cc7669:"7346","8b2d4da3":"7372",afaa6f85:"7392",fbf0a0a7:"7490",df36ecd4:"7567","1ddcdff5":"7647","83e43ff1":"7659","3160d5c7":"7749",e14c639a:"7796","377d34c2":"7813","568f204d":"7830","74882eab":"7833","40e2e448":"7854","206e8b40":"7904",ead21b0a:"7968",b251fb47:"7983",b4161e04:"7996","78886a16":"8018",f8292b17:"8097","41c3269f":"8114","1cd0502b":"8158","272c7b59":"8242","305f83c8":"8244","1c091541":"8271","9d1c829d":"8279",ac72f4d5:"8309",aa126475:"8333","208d09d7":"8337",fa7c6226:"8419",a32436d0:"8461","51f472b9":"8472","5e73aff3":"8481","6875c492":"8610","605a1123":"8619",e9fbe6ff:"8620","97affa74":"8634","6f13de77":"8685","4696e759":"8766","68ba87f2":"8857","99977c84":"8876",c757b298:"8884",f3f1a75b:"8963","7578b5f6":"9009",eb575f18:"9015","9dc553d4":"9033",f35e473a:"9130",feec69fc:"9160","7c8cfcaa":"9205","35a1304b":"9221",a6703bbf:"9247","2d78f039":"9255",f2534a3f:"9322",e2cfa70e:"9324","247783bb":"9334","74da7579":"9340","1daa9b51":"9382",cc120547:"9391","9f104ddb":"9404","7eb32d37":"9487","1be78505":"9514",e44ab7b1:"9546",d567a5f3:"9680","978f5c7d":"9687",cddb67a8:"9706","286cdff1":"9716","0ca68b49":"9726","43b1a21e":"9774",fe1659de:"9824","06024424":"9859",febe53b7:"9874","7d188f18":"9875","3b5b6856":"9878","29713cec":"9914","53e20daa":"9927","347c37ac":"9945"}[e]||e,r.p+r.u(e)},(()=>{var e={1303:0,532:0};r.f.j=(f,a)=>{var b=r.o(e,f)?e[f]:void 0;if(0!==b)if(b)a.push(b[2]);else if(/^(1303|532)$/.test(f))e[f]=0;else{var d=new Promise(((a,d)=>b=e[f]=[a,d]));a.push(b[2]=d);var c=r.p+r.u(f),t=new Error;r.l(c,(a=>{if(r.o(e,f)&&(0!==(b=e[f])&&(e[f]=void 0),b)){var d=a&&("load"===a.type?"missing":a.type),c=a&&a.target&&a.target.src;t.message="Loading chunk "+f+" failed.\n("+d+": "+c+")",t.name="ChunkLoadError",t.type=d,t.request=c,b[1](t)}}),"chunk-"+f,f)}},r.O.j=f=>0===e[f];var f=(f,a)=>{var b,d,c=a[0],t=a[1],o=a[2],n=0;if(c.some((f=>0!==e[f]))){for(b in t)r.o(t,b)&&(r.m[b]=t[b]);if(o)var i=o(r)}for(f&&f(a);n{"use strict";var e,f,a,b,d,c={},t={};function r(e){var f=t[e];if(void 0!==f)return f.exports;var a=t[e]={exports:{}};return c[e].call(a.exports,a,a.exports,r),a.exports}r.m=c,e=[],r.O=(f,a,b,d)=>{if(!a){var c=1/0;for(i=0;i=d)&&Object.keys(r.O).every((e=>r.O[e](a[o])))?a.splice(o--,1):(t=!1,d0&&e[i-1][2]>d;i--)e[i]=e[i-1];e[i]=[a,b,d]},r.n=e=>{var f=e&&e.__esModule?()=>e.default:()=>e;return r.d(f,{a:f}),f},a=Object.getPrototypeOf?e=>Object.getPrototypeOf(e):e=>e.__proto__,r.t=function(e,b){if(1&b&&(e=this(e)),8&b)return e;if("object"==typeof e&&e){if(4&b&&e.__esModule)return e;if(16&b&&"function"==typeof e.then)return e}var d=Object.create(null);r.r(d);var c={};f=f||[null,a({}),a([]),a(a)];for(var t=2&b&&e;"object"==typeof t&&!~f.indexOf(t);t=a(t))Object.getOwnPropertyNames(t).forEach((f=>c[f]=()=>e[f]));return c.default=()=>e,r.d(d,c),d},r.d=(e,f)=>{for(var a in f)r.o(f,a)&&!r.o(e,a)&&Object.defineProperty(e,a,{enumerable:!0,get:f[a]})},r.f={},r.e=e=>Promise.all(Object.keys(r.f).reduce(((f,a)=>(r.f[a](e,f),f)),[])),r.u=e=>"assets/js/"+({53:"935f2afb",70:"b4266ab5",74:"3762e359",75:"93232c32",125:"cc78421f",137:"e6b210f1",143:"ae57ea02",168:"71ff360b",190:"ba836e7d",232:"56d53d53",304:"3480b943",379:"b5684a7b",436:"27414590",474:"24a48cd0",489:"f972728b",503:"5eb6fda8",527:"ffff3183",533:"b2b675dd",556:"2729f289",705:"bc50734c",758:"023934a0",790:"207b7cff",838:"44b1cbe2",846:"f7950235",904:"2545d4b6",953:"bae51714",988:"9bdeab26",996:"39af834a",1002:"fe6343fd",1013:"91ab3747",1014:"81c352c7",1023:"be6c2ff2",1061:"cbad36d9",1094:"07b3ca0b",1120:"eeb12725",1128:"0513b4b7",1140:"91edb5cf",1143:"689aaa3d",1171:"d9beab61",1224:"a56f2bca",1237:"2e5465c5",1257:"03431110",1368:"986a7b24",1394:"5a391425",1405:"49b4d3fe",1433:"c3542997",1434:"5f38f66e",1435:"7684512e",1477:"b2f554cd",1484:"25243afb",1538:"19af0c64",1579:"521740bf",1655:"899d5fe0",1678:"923434ee",1684:"93f00860",1689:"f6ac3114",1690:"e4b75637",1695:"ff74d3da",1713:"a7023ddc",1722:"fe1b78e1",1745:"d7efef2f",1756:"0661cc41",1793:"07e54361",1840:"6cbd7e7d",1857:"bd7d9199",1913:"373b159b",1933:"8195011d",1947:"fbde1876",1955:"2a436d5c",1985:"ef1be1e1",1988:"9f58059d",2106:"8f11fbb5",2112:"bde18961",2115:"a0a891b7",2150:"4f7fe039",2157:"0b0df7a2",2161:"90d098bd",2165:"e53d3ff9",2286:"35b14ade",2308:"e4f6b8e1",2313:"b879cbc2",2329:"b40c3376",2366:"d832a854",2416:"06309b2a",2493:"58f10d9f",2500:"4f907a97",2523:"2e0ef41a",2529:"a23fbffc",2535:"814f3328",2615:"116fecd1",2669:"963797ee",2685:"0c3559ad",2739:"61356d5d",2748:"3853ad19",2818:"8de92970",2828:"74d883b7",2995:"af0debe5",3006:"1b7cc7bc",3018:"d15112ec",3057:"9b79081a",3080:"c49571eb",3089:"a6aa9e1f",3154:"e1105187",3189:"4511f06b",3215:"236d8693",3228:"481b4727",3229:"7f26efeb",3231:"1329fb4f",3233:"32528799",3288:"5648656a",3321:"0dbd89b2",3368:"a3eb7131",3369:"4f239cc1",3420:"00ebe6f9",3492:"f5b6cb08",3559:"0bd945fa",3608:"9e4087bc",3667:"f6fec203",3727:"f1e36233",3840:"73c5427a",3859:"bca0dfde",3881:"589c66ec",3918:"c592492b",3919:"5d3ff7ab",3952:"6edc6741",4007:"77daf463",4008:"1a1c0fb0",4011:"9b1574cb",4013:"01a85c17",4034:"56383101",4060:"2914bf67",4106:"bd9c0894",4121:"6b5d17d2",4130:"d34d6740",4193:"2572f700",4198:"f3650d5d",4230:"327c535f",4272:"a5adff03",4319:"b3c9b7e8",4322:"ddb3d303",4351:"7beeba1c",4355:"22ced7ff",4433:"d218f8f7",4446:"3b7875ca",4447:"d83ef4b5",4457:"fcfb8e31",4472:"3f90d064",4475:"4012ba53",4527:"8ba41740",4528:"f012d72b",4535:"e36a0948",4537:"9cf37abf",4540:"1cd68b1e",4567:"20dac1bd",4571:"5936e3f8",4608:"3818d7db",4720:"34f595d3",4758:"38311505",4805:"26df6cbd",4848:"d29f8d9b",4856:"bed23bc4",4885:"5f0d6fdb",4893:"d22055dc",4896:"7a8da0ce",4927:"6f99d302",4934:"88015853",4941:"1efab335",4948:"c3c8b115",5032:"d3fd6aa5",5057:"5c2ad240",5073:"63f3ccc1",5101:"959b44ee",5113:"18c9cfd4",5133:"ffe4100e",5150:"dbe33f09",5173:"7d733c18",5182:"4645ad56",5208:"a25d6fd7",5210:"ca3e8775",5258:"c0d50cc0",5281:"1cda5aa6",5387:"dcfe1bde",5416:"fb1f8cbb",5451:"b7557c51",5491:"d2453d90",5536:"5a4ad223",5620:"8e04f48d",5689:"c2728190",5761:"ab388152",5762:"877d4050",5870:"ac51e66e",5875:"c0fee9fd",5881:"78d62bd9",5938:"5bb31039",5995:"aa10845f",6020:"4db21eee",6088:"88988c18",6103:"ccc49370",6208:"486179e1",6230:"c886740e",6235:"57f5c722",6237:"9ba654c2",6251:"0430ce14",6318:"1609ca8c",6329:"54c82979",6336:"6763b9d9",6338:"4528a46e",6366:"6e1f8ce6",6392:"f6305a2a",6398:"aefeddaf",6425:"f051bb65",6435:"42d9f35f",6443:"65c6927d",6508:"797023eb",6612:"ec8a462b",6618:"2d40f4be",6705:"4888691f",6736:"afc2f83f",6765:"a1612d77",6792:"2ea1e391",6809:"2defc614",6849:"4b44443b",6862:"3521e0c7",6869:"8fb4711f",6881:"cfd4e1da",6909:"8e3c9231",6912:"51c1bc08",6966:"552162b0",7054:"5431a54b",7181:"fa377e30",7183:"32cba7ce",7197:"e8d59815",7199:"7a93542f",7208:"5b72acc5",7220:"e7c33aac",7346:"f2cc7669",7372:"8b2d4da3",7392:"afaa6f85",7400:"19623007",7490:"fbf0a0a7",7567:"df36ecd4",7647:"1ddcdff5",7656:"80151786",7659:"83e43ff1",7697:"50417919",7749:"3160d5c7",7796:"e14c639a",7813:"377d34c2",7830:"568f204d",7833:"74882eab",7854:"40e2e448",7904:"206e8b40",7918:"17896441",7968:"ead21b0a",7983:"b251fb47",7996:"b4161e04",8018:"78886a16",8097:"f8292b17",8114:"41c3269f",8158:"1cd0502b",8242:"272c7b59",8244:"305f83c8",8271:"1c091541",8279:"9d1c829d",8309:"ac72f4d5",8333:"aa126475",8337:"208d09d7",8419:"fa7c6226",8461:"a32436d0",8472:"51f472b9",8481:"5e73aff3",8592:"70926518",8610:"6875c492",8619:"605a1123",8620:"e9fbe6ff",8634:"97affa74",8685:"6f13de77",8766:"4696e759",8857:"68ba87f2",8876:"99977c84",8884:"c757b298",8963:"f3f1a75b",9009:"7578b5f6",9015:"eb575f18",9033:"9dc553d4",9130:"f35e473a",9160:"feec69fc",9205:"7c8cfcaa",9221:"35a1304b",9247:"a6703bbf",9255:"2d78f039",9322:"f2534a3f",9324:"e2cfa70e",9334:"247783bb",9340:"74da7579",9382:"1daa9b51",9391:"cc120547",9404:"9f104ddb",9487:"7eb32d37",9514:"1be78505",9546:"e44ab7b1",9680:"d567a5f3",9687:"978f5c7d",9706:"cddb67a8",9716:"286cdff1",9726:"0ca68b49",9774:"43b1a21e",9824:"fe1659de",9859:"06024424",9874:"febe53b7",9875:"7d188f18",9878:"3b5b6856",9914:"29713cec",9927:"53e20daa",9945:"347c37ac"}[e]||e)+"."+{53:"85bd2ad0",70:"a79cc619",74:"87ad580e",75:"ba49f655",125:"ee166a0d",137:"b5f453ea",143:"0b3ce843",168:"8f759dee",190:"d5d394c6",232:"68d2210f",304:"444b49c9",379:"eedec262",436:"480cabac",474:"4a050319",489:"23adbdd8",503:"aef40381",527:"0e0ecb22",533:"d88e0aaf",556:"d3f2db64",705:"cd7b8395",758:"a3c06b83",790:"380e80e6",838:"640d37fc",846:"727c9384",904:"bc96a6bb",953:"e4ce4a2b",988:"ee8b1062",996:"286c6647",1002:"e6f443a6",1013:"3809f8e7",1014:"fcf2bffa",1023:"28dc2d23",1061:"7ea2864f",1094:"5dab0728",1120:"4f6f2aea",1128:"65ce1a56",1140:"00516883",1143:"4aa6b232",1171:"22b58cfd",1224:"e047a4aa",1237:"d347a96c",1257:"f3d175d1",1368:"c2a98948",1394:"d205670c",1405:"d65dd38b",1433:"3d44c209",1434:"40edff1c",1435:"a7d2d575",1477:"fdbd6fcf",1484:"a6301a44",1538:"a07a5df3",1579:"b46d5049",1655:"f58a3749",1678:"da94a1a2",1684:"15190a6e",1689:"f1e4ca63",1690:"6f99d9a7",1695:"773cf80b",1713:"76202190",1722:"9d24017b",1745:"49386147",1756:"504542ca",1793:"2db2c7f2",1840:"858dbec8",1857:"564089b8",1913:"af9a2108",1933:"9f9f4633",1947:"94fdf9d0",1955:"46a13fb4",1985:"3301c264",1988:"a61c95d1",2106:"e7fd625d",2112:"678faea1",2115:"282b0f22",2150:"552e629c",2157:"44e55818",2161:"d1c3f115",2165:"3a7fdbb2",2286:"641cef90",2308:"f587da2e",2313:"0518b035",2329:"2ea6f5de",2366:"adf72713",2416:"12db4505",2493:"3a75331c",2500:"88539fd0",2523:"0e7ae1f2",2529:"552f8bdf",2535:"43468b6f",2615:"a546b37b",2669:"e18c6896",2685:"22362746",2739:"83e9f664",2748:"30f9b15d",2818:"93e3f3be",2828:"1a0fd7be",2995:"ff09bfcf",3006:"d5ffdd2d",3018:"d3d18586",3057:"68d75a2e",3080:"01ba5041",3089:"63fb042a",3154:"e226b3ec",3189:"49975c09",3215:"1fd6a106",3228:"bbb01b48",3229:"c17da2b2",3231:"c09cb887",3233:"b4c4f1e7",3288:"00d9a459",3321:"e6727e42",3368:"d2126698",3369:"c8149385",3420:"8f65ca77",3492:"77dde2ae",3559:"861d1cd6",3608:"b5df34c4",3667:"b8beac18",3727:"1d5ad730",3840:"dd501e0c",3859:"fbebe9d2",3881:"f6526055",3918:"fd6cf91c",3919:"ac9fa7d0",3952:"dc17346c",4007:"369d07e6",4008:"44997440",4011:"6486f29c",4013:"3480387e",4034:"76766c1d",4060:"3ad386a4",4106:"e14f178f",4121:"d5df942b",4130:"2f955dfe",4193:"4bdae46c",4198:"36f45c29",4230:"0bbadb02",4272:"bf82ef27",4319:"b3c51855",4322:"b8638c03",4351:"940a7a93",4355:"b019cd60",4433:"a5175022",4446:"89d6669a",4447:"561c12fc",4457:"42d275cb",4472:"68994fb4",4475:"3263c914",4527:"c95de201",4528:"3fd74978",4535:"908bf7a5",4537:"f48159e4",4540:"fafa4397",4567:"e9375ffa",4571:"cc486c9b",4608:"0c6e4d41",4720:"e6fd5ebc",4758:"4e45b796",4805:"4d0de1df",4848:"0aa90d3a",4856:"bd6ff808",4885:"73e3e49a",4893:"cd033837",4896:"b7e1346b",4927:"7b00ca95",4934:"a8b8a645",4941:"bb692913",4948:"cb5d07f7",4972:"fcd66616",5032:"a97e2ad7",5057:"f3e747ba",5073:"c99d1438",5101:"91ea1946",5113:"9c0d55ef",5133:"11018c6f",5150:"ccf2ee94",5173:"fd0380d6",5182:"02cda888",5208:"856c8726",5210:"2ec59f11",5258:"1b02c088",5281:"3acaed66",5387:"fbc7080c",5416:"39d2c6e8",5451:"6a120073",5491:"edfc45f3",5536:"865c303a",5620:"5f24bdb0",5689:"89ee60ee",5761:"569d588c",5762:"46db3612",5870:"b410b0b7",5875:"21a38cc3",5881:"6c8ff055",5938:"4dc2a87d",5995:"3f0dd825",6020:"89dce336",6048:"341e3f6b",6088:"efa01055",6103:"6909e0f1",6208:"c536619f",6230:"cc140ea1",6235:"30d1f5ee",6237:"e505b091",6251:"0c5c366e",6318:"3705d074",6329:"25d32984",6336:"db937c1e",6338:"81e3d85f",6366:"c124a60e",6392:"878e8356",6398:"7ce9e498",6425:"b78ea8ec",6435:"9ef03e55",6443:"980dba48",6508:"ba631703",6612:"9b10c857",6618:"4b912f7d",6705:"c75edc81",6736:"1a84d8f4",6765:"22e6b1d2",6792:"f33527dc",6809:"56a01b31",6849:"0a89ad00",6862:"e2e0495a",6869:"b5364e1f",6881:"c2743313",6909:"f16e1b6e",6912:"37dfa78c",6966:"32f80aa9",7054:"cf2347e8",7181:"0659a7c8",7183:"74c7250f",7197:"626cced9",7199:"924d4be4",7208:"92ca1881",7220:"5c711f43",7346:"40065e23",7372:"60564700",7392:"0179230a",7400:"950813ce",7490:"4e93c0d9",7567:"e82440b5",7647:"64d03f63",7656:"6950c92b",7659:"297e84d6",7697:"280f4fc5",7749:"33bb4054",7796:"e02647c4",7813:"88beb2cf",7830:"180b0d92",7833:"9cb9fe64",7854:"48c391b2",7904:"24ede80b",7918:"df16294d",7968:"f58059ef",7983:"1c61b8de",7996:"021059d3",8018:"39e80a27",8097:"b68a870b",8114:"86cb582f",8158:"3a06108a",8242:"7efc152e",8244:"4b33707a",8271:"05841d70",8279:"4a118ab0",8309:"7482cfa6",8333:"cede7f8c",8337:"70e836a7",8357:"77417755",8419:"2a9654c3",8461:"030ce541",8472:"fc23dee2",8481:"c33e25af",8592:"6ebfe92f",8610:"14c8e2db",8619:"14f82cb1",8620:"644fbef9",8634:"45c89ee4",8685:"2fb3216a",8766:"14090488",8857:"553757d8",8876:"1e438729",8884:"dccb4039",8963:"aa069220",9009:"20ffe920",9015:"1743a961",9033:"d86494a3",9130:"66c14487",9160:"33c46af5",9205:"cdd130de",9221:"18815ffb",9247:"1f4e454d",9255:"7193711f",9322:"76531c3a",9324:"d2024120",9334:"8199a693",9340:"fa6012b1",9382:"67a981a0",9391:"d957d197",9404:"6760c531",9487:"ae9e3f4a",9514:"fc89e98a",9546:"f48dbc7f",9680:"4923e9c1",9687:"c9ff2d8b",9706:"9af06ac3",9716:"3bef1182",9726:"6539670d",9774:"16ab296c",9824:"ecab8fb0",9859:"e8730cf9",9874:"a43c45cc",9875:"e906463f",9878:"2eaa431b",9914:"524f1554",9927:"a50baa46",9945:"12b933b1"}[e]+".js",r.miniCssF=e=>{},r.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||new Function("return this")()}catch(e){if("object"==typeof window)return window}}(),r.o=(e,f)=>Object.prototype.hasOwnProperty.call(e,f),b={},d="starwhale-docs:",r.l=(e,f,a,c)=>{if(b[e])b[e].push(f);else{var t,o;if(void 0!==a)for(var n=document.getElementsByTagName("script"),i=0;i{t.onerror=t.onload=null,clearTimeout(s);var d=b[e];if(delete b[e],t.parentNode&&t.parentNode.removeChild(t),d&&d.forEach((e=>e(a))),f)return f(a)},s=setTimeout(l.bind(null,void 0,{type:"timeout",target:t}),12e4);t.onerror=l.bind(null,t.onerror),t.onload=l.bind(null,t.onload),o&&document.head.appendChild(t)}},r.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},r.p="/",r.gca=function(e){return e={17896441:"7918",19623007:"7400",27414590:"436",32528799:"3233",38311505:"4758",50417919:"7697",56383101:"4034",70926518:"8592",80151786:"7656",88015853:"4934","935f2afb":"53",b4266ab5:"70","3762e359":"74","93232c32":"75",cc78421f:"125",e6b210f1:"137",ae57ea02:"143","71ff360b":"168",ba836e7d:"190","56d53d53":"232","3480b943":"304",b5684a7b:"379","24a48cd0":"474",f972728b:"489","5eb6fda8":"503",ffff3183:"527",b2b675dd:"533","2729f289":"556",bc50734c:"705","023934a0":"758","207b7cff":"790","44b1cbe2":"838",f7950235:"846","2545d4b6":"904",bae51714:"953","9bdeab26":"988","39af834a":"996",fe6343fd:"1002","91ab3747":"1013","81c352c7":"1014",be6c2ff2:"1023",cbad36d9:"1061","07b3ca0b":"1094",eeb12725:"1120","0513b4b7":"1128","91edb5cf":"1140","689aaa3d":"1143",d9beab61:"1171",a56f2bca:"1224","2e5465c5":"1237","03431110":"1257","986a7b24":"1368","5a391425":"1394","49b4d3fe":"1405",c3542997:"1433","5f38f66e":"1434","7684512e":"1435",b2f554cd:"1477","25243afb":"1484","19af0c64":"1538","521740bf":"1579","899d5fe0":"1655","923434ee":"1678","93f00860":"1684",f6ac3114:"1689",e4b75637:"1690",ff74d3da:"1695",a7023ddc:"1713",fe1b78e1:"1722",d7efef2f:"1745","0661cc41":"1756","07e54361":"1793","6cbd7e7d":"1840",bd7d9199:"1857","373b159b":"1913","8195011d":"1933",fbde1876:"1947","2a436d5c":"1955",ef1be1e1:"1985","9f58059d":"1988","8f11fbb5":"2106",bde18961:"2112",a0a891b7:"2115","4f7fe039":"2150","0b0df7a2":"2157","90d098bd":"2161",e53d3ff9:"2165","35b14ade":"2286",e4f6b8e1:"2308",b879cbc2:"2313",b40c3376:"2329",d832a854:"2366","06309b2a":"2416","58f10d9f":"2493","4f907a97":"2500","2e0ef41a":"2523",a23fbffc:"2529","814f3328":"2535","116fecd1":"2615","963797ee":"2669","0c3559ad":"2685","61356d5d":"2739","3853ad19":"2748","8de92970":"2818","74d883b7":"2828",af0debe5:"2995","1b7cc7bc":"3006",d15112ec:"3018","9b79081a":"3057",c49571eb:"3080",a6aa9e1f:"3089",e1105187:"3154","4511f06b":"3189","236d8693":"3215","481b4727":"3228","7f26efeb":"3229","1329fb4f":"3231","5648656a":"3288","0dbd89b2":"3321",a3eb7131:"3368","4f239cc1":"3369","00ebe6f9":"3420",f5b6cb08:"3492","0bd945fa":"3559","9e4087bc":"3608",f6fec203:"3667",f1e36233:"3727","73c5427a":"3840",bca0dfde:"3859","589c66ec":"3881",c592492b:"3918","5d3ff7ab":"3919","6edc6741":"3952","77daf463":"4007","1a1c0fb0":"4008","9b1574cb":"4011","01a85c17":"4013","2914bf67":"4060",bd9c0894:"4106","6b5d17d2":"4121",d34d6740:"4130","2572f700":"4193",f3650d5d:"4198","327c535f":"4230",a5adff03:"4272",b3c9b7e8:"4319",ddb3d303:"4322","7beeba1c":"4351","22ced7ff":"4355",d218f8f7:"4433","3b7875ca":"4446",d83ef4b5:"4447",fcfb8e31:"4457","3f90d064":"4472","4012ba53":"4475","8ba41740":"4527",f012d72b:"4528",e36a0948:"4535","9cf37abf":"4537","1cd68b1e":"4540","20dac1bd":"4567","5936e3f8":"4571","3818d7db":"4608","34f595d3":"4720","26df6cbd":"4805",d29f8d9b:"4848",bed23bc4:"4856","5f0d6fdb":"4885",d22055dc:"4893","7a8da0ce":"4896","6f99d302":"4927","1efab335":"4941",c3c8b115:"4948",d3fd6aa5:"5032","5c2ad240":"5057","63f3ccc1":"5073","959b44ee":"5101","18c9cfd4":"5113",ffe4100e:"5133",dbe33f09:"5150","7d733c18":"5173","4645ad56":"5182",a25d6fd7:"5208",ca3e8775:"5210",c0d50cc0:"5258","1cda5aa6":"5281",dcfe1bde:"5387",fb1f8cbb:"5416",b7557c51:"5451",d2453d90:"5491","5a4ad223":"5536","8e04f48d":"5620",c2728190:"5689",ab388152:"5761","877d4050":"5762",ac51e66e:"5870",c0fee9fd:"5875","78d62bd9":"5881","5bb31039":"5938",aa10845f:"5995","4db21eee":"6020","88988c18":"6088",ccc49370:"6103","486179e1":"6208",c886740e:"6230","57f5c722":"6235","9ba654c2":"6237","0430ce14":"6251","1609ca8c":"6318","54c82979":"6329","6763b9d9":"6336","4528a46e":"6338","6e1f8ce6":"6366",f6305a2a:"6392",aefeddaf:"6398",f051bb65:"6425","42d9f35f":"6435","65c6927d":"6443","797023eb":"6508",ec8a462b:"6612","2d40f4be":"6618","4888691f":"6705",afc2f83f:"6736",a1612d77:"6765","2ea1e391":"6792","2defc614":"6809","4b44443b":"6849","3521e0c7":"6862","8fb4711f":"6869",cfd4e1da:"6881","8e3c9231":"6909","51c1bc08":"6912","552162b0":"6966","5431a54b":"7054",fa377e30:"7181","32cba7ce":"7183",e8d59815:"7197","7a93542f":"7199","5b72acc5":"7208",e7c33aac:"7220",f2cc7669:"7346","8b2d4da3":"7372",afaa6f85:"7392",fbf0a0a7:"7490",df36ecd4:"7567","1ddcdff5":"7647","83e43ff1":"7659","3160d5c7":"7749",e14c639a:"7796","377d34c2":"7813","568f204d":"7830","74882eab":"7833","40e2e448":"7854","206e8b40":"7904",ead21b0a:"7968",b251fb47:"7983",b4161e04:"7996","78886a16":"8018",f8292b17:"8097","41c3269f":"8114","1cd0502b":"8158","272c7b59":"8242","305f83c8":"8244","1c091541":"8271","9d1c829d":"8279",ac72f4d5:"8309",aa126475:"8333","208d09d7":"8337",fa7c6226:"8419",a32436d0:"8461","51f472b9":"8472","5e73aff3":"8481","6875c492":"8610","605a1123":"8619",e9fbe6ff:"8620","97affa74":"8634","6f13de77":"8685","4696e759":"8766","68ba87f2":"8857","99977c84":"8876",c757b298:"8884",f3f1a75b:"8963","7578b5f6":"9009",eb575f18:"9015","9dc553d4":"9033",f35e473a:"9130",feec69fc:"9160","7c8cfcaa":"9205","35a1304b":"9221",a6703bbf:"9247","2d78f039":"9255",f2534a3f:"9322",e2cfa70e:"9324","247783bb":"9334","74da7579":"9340","1daa9b51":"9382",cc120547:"9391","9f104ddb":"9404","7eb32d37":"9487","1be78505":"9514",e44ab7b1:"9546",d567a5f3:"9680","978f5c7d":"9687",cddb67a8:"9706","286cdff1":"9716","0ca68b49":"9726","43b1a21e":"9774",fe1659de:"9824","06024424":"9859",febe53b7:"9874","7d188f18":"9875","3b5b6856":"9878","29713cec":"9914","53e20daa":"9927","347c37ac":"9945"}[e]||e,r.p+r.u(e)},(()=>{var e={1303:0,532:0};r.f.j=(f,a)=>{var b=r.o(e,f)?e[f]:void 0;if(0!==b)if(b)a.push(b[2]);else if(/^(1303|532)$/.test(f))e[f]=0;else{var d=new Promise(((a,d)=>b=e[f]=[a,d]));a.push(b[2]=d);var c=r.p+r.u(f),t=new Error;r.l(c,(a=>{if(r.o(e,f)&&(0!==(b=e[f])&&(e[f]=void 0),b)){var d=a&&("load"===a.type?"missing":a.type),c=a&&a.target&&a.target.src;t.message="Loading chunk "+f+" failed.\n("+d+": "+c+")",t.name="ChunkLoadError",t.type=d,t.request=c,b[1](t)}}),"chunk-"+f,f)}},r.O.j=f=>0===e[f];var f=(f,a)=>{var b,d,c=a[0],t=a[1],o=a[2],n=0;if(c.some((f=>0!==e[f]))){for(b in t)r.o(t,b)&&(r.m[b]=t[b]);if(o)var i=o(r)}for(f&&f(a);n - + - + \ No newline at end of file diff --git a/blog/index.html b/blog/index.html index 935da07f6..9c8057b0b 100644 --- a/blog/index.html +++ b/blog/index.html @@ -10,7 +10,7 @@ - + @@ -20,7 +20,7 @@ 3) Choose the handler: Run the chatbot model, select the default option: evaluation:chatbot. 4) Choose the runtime: Select the default option, built-in. 5) Advanced configuration: Turn on the auto-release switch, where you can set the duration after which the task will be automatically canceled. If you don't set auto-release, you can manually cancel the task after the experiment is completed.

    Click on Submit to run the model.

    image

    4. View the Results and Logs

    The job list page allows you to view all the tasks in the project.

    image

    Click on the Job ID to enter the task details page, and then click on View Logs to see the logs.

    The total time taken from task submission to model execution is 5 minutes.

    image

    Once the execution is successful, return to the task list and click on the Terminal button to open the chatbox page. You can now start a conversation with Llama 2-Chat on the chatbox page.

    image

    image

    These are the instructions on how to use Starwhale Cloud to run Llama 2-Chat. If you encounter any issues during the process, please feel free to leave a private message. You can also visit the Starwhale official website for more information. Thank you for your attention and support.

    · One min read
    tianwei

    Starwhale is an MLOps platform that make your model creation, evaluation and publication much eaiser. It aims to create a handy tool for data scientists and machine learning engineers.

    - + \ No newline at end of file diff --git a/blog/intro-starwhale/index.html b/blog/intro-starwhale/index.html index 45523e9ef..fc280c734 100644 --- a/blog/intro-starwhale/index.html +++ b/blog/intro-starwhale/index.html @@ -10,13 +10,13 @@ - +

    What is Starwhale?

    · One min read
    tianwei

    Starwhale is an MLOps platform that make your model creation, evaluation and publication much eaiser. It aims to create a handy tool for data scientists and machine learning engineers.

    Starwhale helps you:

    • Keep track of your training/testing data history including data items and their labels, so that you can easily access them.
    • Manage your model packages that you can share across your team.
    • Run your models in different environments, either on a Nvidia GPU server or on an embedded device like Cherry Pi.
    • Create a online service with interactive Web UI for your models.

    Starwhale is designed to be an open platform. You can create your own plugins to meet your requirements.

    - + \ No newline at end of file diff --git a/blog/reproduce-and-compare-evals/index.html b/blog/reproduce-and-compare-evals/index.html index 9c4f9f056..50ec6867e 100644 --- a/blog/reproduce-and-compare-evals/index.html +++ b/blog/reproduce-and-compare-evals/index.html @@ -10,13 +10,13 @@ - +

    Reporduce and compare evaluations using Starwhale

    · 3 min read

    If you have doubts about the findings of this report or any other evaluations, how should you reproduce and compare the evaluation results?

    Workflow: Login → Create a project → Run the model → Create a report

    STEP1: Login

    First, you have to log in to the Starwhale platform by clicking on the login. If you haven't registered yet, you can click on the sign-up to create an account.

    STEP2: Create a project

    After successful login, you will be directed to the project list page. Click the Create button in the top right corner to create a new project. Enter the project name and click the Submit button to create the project.

    STEP3: Run the models

    Go to the Evaluations list pag, click the Create button, and then choose the parameters.

    For example, to reproduce the evaluation result of baichuan2-13b with the cmmlu dataset, refer to the following:

    1. Choose the running resource, recommend to select A10*24G*2;
    2. Select the model: Choose the models you want to reproduce, e.g.: starwhale/llm-leaderboard/baichuan2-13b/atgoiscm(v1、latest);
    3. Choose the handler: Select the option "src.evaluation:evaluation_results";
    4. Choose the dataset: Select the option "starwhale/llm-leaderboard/cmmlu/kiwtxza7(v1、latest)";
    5. Choose the runtime: Select the option "starwhale/llm-leaderboard/llm-leaderboard/ickinf6q(v1、latest)".
    6. Advanced configuration: Turn off the auto-release switch.

    Click Submit to run the model. During the evaluation process, you can click View Log on the task tab of the evaluation details page to understand the running status of the evaluation. When the evaluation status is "Successed," you can view the results on the list and details pages.

    STEP4: Compare the evaluation results

    To create a report, go to the Report list page and click the Create button in the upper right corner.

    Reports provide rich text editing capabilities, and here we mainly introduce how to compare your evaluation results with Starwhale or other evaluation results.

    1. Input the report title and description;
    2. Input /, select and click the Panel option;
    3. Click the Add Evaluation button, select the project, such as "llm-leaderboard", and then to check the evaluations you want to add. Click Add to add evaluations to the evaluation list. You can add multiple evaluations that you want to compare across different projects;
    4. After adding the evaluations, click the Column Management settings icon to set the columns in the evaluation list and their display order. When you hover over a column in the evaluation list, you can fix that column or sort it in ascending or descending order;
    5. You can click the Add Chart button and select the chart type, such as Bar Chart, then add Metrics related to accuracy (support for metric fuzzy search). Input a chart title (optional) and click Submit to display the data in bar chart format for intuitive analysis.
    6. Click Publish to Project button to publish the report;
    7. If you want to share the report with others, go to the Report list page, turn on the "Share" switch, and people who obtain the report link can view it.

    reproduce-and-compare-evals.gif

    These are the instructions on how to reproduce and compare evaluations using Starwhale. Please leave a private message if you encounter any issues during the using process. You can also visit the Starwhale official website for more information. Thank you for your attention and support.

    - + \ No newline at end of file diff --git a/blog/run-llama2-chat-in-five-minutes/index.html b/blog/run-llama2-chat-in-five-minutes/index.html index e72d25bf9..e0838d5a6 100644 --- a/blog/run-llama2-chat-in-five-minutes/index.html +++ b/blog/run-llama2-chat-in-five-minutes/index.html @@ -10,7 +10,7 @@ - + @@ -20,7 +20,7 @@ 3) Choose the handler: Run the chatbot model, select the default option: evaluation:chatbot. 4) Choose the runtime: Select the default option, built-in. 5) Advanced configuration: Turn on the auto-release switch, where you can set the duration after which the task will be automatically canceled. If you don't set auto-release, you can manually cancel the task after the experiment is completed.

    Click on Submit to run the model.

    image

    4. View the Results and Logs

    The job list page allows you to view all the tasks in the project.

    image

    Click on the Job ID to enter the task details page, and then click on View Logs to see the logs.

    The total time taken from task submission to model execution is 5 minutes.

    image

    Once the execution is successful, return to the task list and click on the Terminal button to open the chatbox page. You can now start a conversation with Llama 2-Chat on the chatbox page.

    image

    image

    These are the instructions on how to use Starwhale Cloud to run Llama 2-Chat. If you encounter any issues during the process, please feel free to leave a private message. You can also visit the Starwhale official website for more information. Thank you for your attention and support.

    - + \ No newline at end of file diff --git a/blog/tags/index.html b/blog/tags/index.html index 863f89fd7..03d6830ce 100644 --- a/blog/tags/index.html +++ b/blog/tags/index.html @@ -10,13 +10,13 @@ - + - + \ No newline at end of file diff --git a/blog/tags/intro/index.html b/blog/tags/intro/index.html index a8c4010c8..b9ee2c295 100644 --- a/blog/tags/intro/index.html +++ b/blog/tags/intro/index.html @@ -10,13 +10,13 @@ - +

    One post tagged with "intro"

    View All Tags

    · One min read
    tianwei

    Starwhale is an MLOps platform that make your model creation, evaluation and publication much eaiser. It aims to create a handy tool for data scientists and machine learning engineers.

    - + \ No newline at end of file diff --git a/blog/tags/llama-2/index.html b/blog/tags/llama-2/index.html index 6e771ec3a..db518d60b 100644 --- a/blog/tags/llama-2/index.html +++ b/blog/tags/llama-2/index.html @@ -10,7 +10,7 @@ - + @@ -20,7 +20,7 @@ 3) Choose the handler: Run the chatbot model, select the default option: evaluation:chatbot. 4) Choose the runtime: Select the default option, built-in. 5) Advanced configuration: Turn on the auto-release switch, where you can set the duration after which the task will be automatically canceled. If you don't set auto-release, you can manually cancel the task after the experiment is completed.

    Click on Submit to run the model.

    image

    4. View the Results and Logs

    The job list page allows you to view all the tasks in the project.

    image

    Click on the Job ID to enter the task details page, and then click on View Logs to see the logs.

    The total time taken from task submission to model execution is 5 minutes.

    image

    Once the execution is successful, return to the task list and click on the Terminal button to open the chatbox page. You can now start a conversation with Llama 2-Chat on the chatbox page.

    image

    image

    These are the instructions on how to use Starwhale Cloud to run Llama 2-Chat. If you encounter any issues during the process, please feel free to leave a private message. You can also visit the Starwhale official website for more information. Thank you for your attention and support.

    - + \ No newline at end of file diff --git a/blog/tags/model-evaluaitons/index.html b/blog/tags/model-evaluaitons/index.html index 09ef63afc..fcf4e7807 100644 --- a/blog/tags/model-evaluaitons/index.html +++ b/blog/tags/model-evaluaitons/index.html @@ -10,13 +10,13 @@ - +

    One post tagged with "Model Evaluaitons"

    View All Tags

    · 3 min read

    If you have doubts about the findings of this report or any other evaluations, how should you reproduce and compare the evaluation results?

    Workflow: Login → Create a project → Run the model → Create a report

    STEP1: Login

    First, you have to log in to the Starwhale platform by clicking on the login. If you haven't registered yet, you can click on the sign-up to create an account.

    STEP2: Create a project

    After successful login, you will be directed to the project list page. Click the Create button in the top right corner to create a new project. Enter the project name and click the Submit button to create the project.

    STEP3: Run the models

    Go to the Evaluations list pag, click the Create button, and then choose the parameters.

    For example, to reproduce the evaluation result of baichuan2-13b with the cmmlu dataset, refer to the following:

    1. Choose the running resource, recommend to select A10*24G*2;
    2. Select the model: Choose the models you want to reproduce, e.g.: starwhale/llm-leaderboard/baichuan2-13b/atgoiscm(v1、latest);
    3. Choose the handler: Select the option "src.evaluation:evaluation_results";
    4. Choose the dataset: Select the option "starwhale/llm-leaderboard/cmmlu/kiwtxza7(v1、latest)";
    5. Choose the runtime: Select the option "starwhale/llm-leaderboard/llm-leaderboard/ickinf6q(v1、latest)".
    6. Advanced configuration: Turn off the auto-release switch.

    Click Submit to run the model. During the evaluation process, you can click View Log on the task tab of the evaluation details page to understand the running status of the evaluation. When the evaluation status is "Successed," you can view the results on the list and details pages.

    STEP4: Compare the evaluation results

    To create a report, go to the Report list page and click the Create button in the upper right corner.

    Reports provide rich text editing capabilities, and here we mainly introduce how to compare your evaluation results with Starwhale or other evaluation results.

    1. Input the report title and description;
    2. Input /, select and click the Panel option;
    3. Click the Add Evaluation button, select the project, such as "llm-leaderboard", and then to check the evaluations you want to add. Click Add to add evaluations to the evaluation list. You can add multiple evaluations that you want to compare across different projects;
    4. After adding the evaluations, click the Column Management settings icon to set the columns in the evaluation list and their display order. When you hover over a column in the evaluation list, you can fix that column or sort it in ascending or descending order;
    5. You can click the Add Chart button and select the chart type, such as Bar Chart, then add Metrics related to accuracy (support for metric fuzzy search). Input a chart title (optional) and click Submit to display the data in bar chart format for intuitive analysis.
    6. Click Publish to Project button to publish the report;
    7. If you want to share the report with others, go to the Report list page, turn on the "Share" switch, and people who obtain the report link can view it.

    reproduce-and-compare-evals.gif

    These are the instructions on how to reproduce and compare evaluations using Starwhale. Please leave a private message if you encounter any issues during the using process. You can also visit the Starwhale official website for more information. Thank you for your attention and support.

    - + \ No newline at end of file diff --git a/blog/tags/model-package/index.html b/blog/tags/model-package/index.html index 696702f87..6bf3d188a 100644 --- a/blog/tags/model-package/index.html +++ b/blog/tags/model-package/index.html @@ -10,7 +10,7 @@ - + @@ -20,7 +20,7 @@ 3) Choose the handler: Run the chatbot model, select the default option: evaluation:chatbot. 4) Choose the runtime: Select the default option, built-in. 5) Advanced configuration: Turn on the auto-release switch, where you can set the duration after which the task will be automatically canceled. If you don't set auto-release, you can manually cancel the task after the experiment is completed.

    Click on Submit to run the model.

    image

    4. View the Results and Logs

    The job list page allows you to view all the tasks in the project.

    image

    Click on the Job ID to enter the task details page, and then click on View Logs to see the logs.

    The total time taken from task submission to model execution is 5 minutes.

    image

    Once the execution is successful, return to the task list and click on the Terminal button to open the chatbox page. You can now start a conversation with Llama 2-Chat on the chatbox page.

    image

    image

    These are the instructions on how to use Starwhale Cloud to run Llama 2-Chat. If you encounter any issues during the process, please feel free to leave a private message. You can also visit the Starwhale official website for more information. Thank you for your attention and support.

    - + \ No newline at end of file diff --git a/cloud/billing/bills/index.html b/cloud/billing/bills/index.html index 94c445e0b..4410abf45 100644 --- a/cloud/billing/bills/index.html +++ b/cloud/billing/bills/index.html @@ -10,13 +10,13 @@ - + - + \ No newline at end of file diff --git a/cloud/billing/index.html b/cloud/billing/index.html index 1069dcde4..b4345ab8f 100644 --- a/cloud/billing/index.html +++ b/cloud/billing/index.html @@ -10,13 +10,13 @@ - + - + \ No newline at end of file diff --git a/cloud/billing/recharge/index.html b/cloud/billing/recharge/index.html index f3df5a4ba..1034a5311 100644 --- a/cloud/billing/recharge/index.html +++ b/cloud/billing/recharge/index.html @@ -10,13 +10,13 @@ - + - + \ No newline at end of file diff --git a/cloud/billing/refund/index.html b/cloud/billing/refund/index.html index 564c27162..632c7c922 100644 --- a/cloud/billing/refund/index.html +++ b/cloud/billing/refund/index.html @@ -10,13 +10,13 @@ - + - + \ No newline at end of file diff --git a/cloud/billing/voucher/index.html b/cloud/billing/voucher/index.html index 5cbcee102..b568ca370 100644 --- a/cloud/billing/voucher/index.html +++ b/cloud/billing/voucher/index.html @@ -10,13 +10,13 @@ - + - + \ No newline at end of file diff --git a/cloud/index.html b/cloud/index.html index 9b55c2cc7..7517b8e7d 100644 --- a/cloud/index.html +++ b/cloud/index.html @@ -10,13 +10,13 @@ - + - + \ No newline at end of file diff --git a/community/contribute/index.html b/community/contribute/index.html index c2fd9fa46..e1ef9ca63 100644 --- a/community/contribute/index.html +++ b/community/contribute/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    Contribute to Starwhale

    Getting Involved/Contributing

    We welcome and encourage all contributions to Starwhale, including and not limited to:

    • Describe the problems encountered during use.
    • Submit feature request.
    • Discuss in Slack and Github Issues.
    • Code Review.
    • Improve docs, tutorials and examples.
    • Fix Bug.
    • Add Test Case.
    • Code readability and code comments to import readability.
    • Develop new features.
    • Write enhancement proposal.

    You can get involved, get updates and contact Starwhale developers in the following ways:

    Starwhale Resources

    Code Structure

    • client: swcli and Python SDK with Pure Python3, which includes all Standalone Instance features.
      • api: Python SDK.
      • cli: Command Line Interface entrypoint.
      • base: Python base abstract.
      • core: Starwhale core concepts which includes Dataset,Model,Runtime,Project, job and Evaluation, etc.
      • utils: Python utilities lib.
    • console: frontend with React + TypeScript.
    • server:Starwhale Controller with java, which includes all Starwhale Cloud Instance backend apis.
    • docker:Helm Charts, dockerfile.
    • docs:Starwhale官方文档。
    • example:Example code.
    • scripts:Bash and Python scripts for E2E testing and software releases, etc.

    Fork and clone the repository

    You will need to fork the code of Starwhale repository and clone it to your local machine.

    • Fork Starwhale repository: Fork Starwhale Github Repo,For more usage details, please refer to: Fork a repo

    • Install Git-LFS:Git-LFS

       git lfs install
    • Clone code to local machine

      git clone https://github.com/${your username}/starwhale.git

    Development environment for Standalone Instance

    Standalone Instance is written in Python3. When you want to modify swcli and sdk, you need to build the development environment.

    Standalone development environment prerequisites

    • OS: Linux or macOS
    • Python: 3.7~3.11
    • Docker: >=19.03(optional)
    • Python isolated env tools:Python venv, virtualenv or conda, etc

    Building from source code

    Based on the previous step, clone to the local directory: starwhale, and enter the client subdirectory:

    cd starwhale/client

    Create an isolated python environment with conda:

    conda create -n starwhale-dev python=3.8 -y
    conda activate starwhale-dev

    Install client package and python dependencies into the starwhale-dev environment:

    make install-sw
    make install-dev-req

    Validate with the swcli --version command. In the development environment, the version is 0.0.0.dev0:

    ❯ swcli --version
    swcli, version 0.0.0.dev0

    ❯ swcli --version
    /home/username/anaconda3/envs/starwhale-dev/bin/swcli

    Modifying the code

    When you modify the code, you need not to install python package(run make install-sw command) again. .editorconfig will be imported into the most IDE and code editors which helps maintain consistent coding styles for multiple developers.

    Lint and Test

    Run unit test, E2E test, mypy lint, flake lint and isort check in the starwhale directory.

    make client-all-check

    Development environment for Cloud Instance

    Cloud Instance is written in Java(backend) and React+TypeScript(frontend).

    Development environment for Console

    Development environment for Server

    • Language: Java
    • Build tool: Maven
    • Development framework: Spring Boot+Mybatis
    • Unit test framework:Junit5
      • Mockito used for mocking
      • Hamcrest used for assertion
      • Testcontainers used for providing lightweight, throwaway instances of common databases, Selenium web browsers that can run in a Docker container.
    • Check style tool:use maven-checkstyle-plugin

    Server development environment prerequisites

    • OS: Linux, macOS or Windows
    • Docker: >=19.03
    • JDK: >=11
    • Maven: >=3.8.1
    • Mysql: >=8.0.29
    • Minio
    • Kubernetes cluster/Minikube(If you don't have a k8s cluster, you can use Minikube as an alternative for development and debugging)

    Modify the code and add unit tests

    Now you can enter the corresponding module to modify and adjust the code on the server side. The main business code directory is src/main/java, and the unit test directory is src/test/java.

    Execute code check and run unit tests

    cd starwhale/server
    mvn clean test

    Deploy the server at local machine

    • Dependent services that need to be deployed

      • Minikube(Optional. Minikube can be used when there is no k8s cluster, there is the installation doc: Minikube

        minikube start
        minikube addons enable ingress
        minikube addons enable ingress-dns
      • Mysql

        docker run --name sw-mysql -d \
        -p 3306:3306 \
        -e MYSQL_ROOT_PASSWORD=starwhale \
        -e MYSQL_USER=starwhale \
        -e MYSQL_PASSWORD=starwhale \
        -e MYSQL_DATABASE=starwhale \
        mysql:latest
      • Minio

        docker run --name minio -d \
        -p 9000:9000 --publish 9001:9001 \
        -e MINIO_DEFAULT_BUCKETS='starwhale' \
        -e MINIO_ROOT_USER="minioadmin" \
        -e MINIO_ROOT_PASSWORD="minioadmin" \
        bitnami/minio:latest
    • Package server program

      If you need to deploy the front-end at the same time when deploying the server, you can execute the build command of the front-end part first, and then execute 'mvn clean package', and the compiled front-end files will be automatically packaged.

      Use the following command to package the program

        cd starwhale/server
      mvn clean package
    • Specify the environment required for server startup

      # Minio env
      export SW_STORAGE_ENDPOINT=http://${Minio IP,default is:27.0.0.1}:9000
      export SW_STORAGE_BUCKET=${Minio bucket,default is:starwhale}
      export SW_STORAGE_ACCESSKEY=${Minio accessKey,default is:starwhale}
      export SW_STORAGE_SECRETKEY=${Minio secretKey,default is:starwhale}
      export SW_STORAGE_REGION=${Minio region,default is:local}
      # kubernetes env
      export KUBECONFIG=${the '.kube' file path}\.kube\config

      export SW_INSTANCE_URI=http://${Server IP}:8082
      export SW_METADATA_STORAGE_IP=${Mysql IP,default: 127.0.0.1}
      export SW_METADATA_STORAGE_PORT=${Mysql port,default: 3306}
      export SW_METADATA_STORAGE_DB=${Mysql dbname,default: starwhale}
      export SW_METADATA_STORAGE_USER=${Mysql user,default: starwhale}
      export SW_METADATA_STORAGE_PASSWORD=${user password,default: starwhale}
    • Deploy server service

      You can use the IDE or the command to deploy.

      java -jar controller/target/starwhale-controller-0.1.0-SNAPSHOT.jar
    • Debug

      there are two ways to debug the modified function:

      • Use swagger-ui for interface debugging, visit /swagger-ui/index.html to find the corresponding api
      • Debug the corresponding function directly in the ui (provided that the front-end code has been built in advance according to the instructions when packaging)
    - + \ No newline at end of file diff --git a/concepts/index.html b/concepts/index.html index ed94fb517..d493fb084 100644 --- a/concepts/index.html +++ b/concepts/index.html @@ -10,13 +10,13 @@ - + - + \ No newline at end of file diff --git a/concepts/names/index.html b/concepts/names/index.html index 0a234b463..c8b1dbd93 100644 --- a/concepts/names/index.html +++ b/concepts/names/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    Names in Starwhale

    Names mean project names, model names, dataset names, runtime names, and tag names.

    Names Limitation

    • Names are case-insensitive.
    • A name MUST only consist of letters A-Z a-z, digits 0-9, the hyphen character -, the dot character ., and the underscore character _.
    • A name should always start with a letter or the _ character.
    • The maximum length of a name is 80.

    Names uniqueness requirement

    • The resource name should be a unique string within its owner. For example, the project name should be unique in the owner instance, and the model name should be unique in the owner project.
    • The resource name can not be used by any other resource of the same kind in their owner, including those removed ones. For example, Project "apple" can not have two models named "Alice", even if one of them is already removed.
    • Different kinds of resources can have the same name. For example, a project and a model can be called "Alice" simultaneously.
    • Resources with different owners can have the same name. For example, a model in project "Apple" and a model in project "Banana" can have the same name "Alice".
    • Garbage-collected resources' names can be reused. For example, after the model with the name "Alice" in project "Apple" is removed and garbage collected, the project can have a new model with the same name "Alice".
    - + \ No newline at end of file diff --git a/concepts/project/index.html b/concepts/project/index.html index 455cb4c6b..892564716 100644 --- a/concepts/project/index.html +++ b/concepts/project/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    Project in Starwhale

    "Project" is the basic unit for organizing different resources like models, datasets, etc. You may use projects for different purposes. For example, you can create a project for a data scientist team, a product line, or a specific model. Users usually work on one or more projects in their daily lives.

    Starwhale Server/Cloud projects are grouped by accounts. Starwhale Standalone does not have accounts. So you will not see any account name prefix in Starwhale Standalone projects. Starwhale Server/Cloud projects can be either "public" or "private". Public projects means all users on the same instance are assigned a "guest" role to the project by default. For more information about roles, see Roles and permissions in Starwhale.

    A self project is created automatically and configured as the default project in Starwhale Standalone.

    - + \ No newline at end of file diff --git a/concepts/roles-permissions/index.html b/concepts/roles-permissions/index.html index 18452375e..3d081bde7 100644 --- a/concepts/roles-permissions/index.html +++ b/concepts/roles-permissions/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    Roles and permissions in Starwhale

    Roles are used to assign permissions to users. Only Starwhale Server/Cloud has roles and permissions, and Starwhale Standalone does not.The Administrator role is automatically created and assigned to the user "admin". Some sensitive operations can only be performed by users with the Administrator role, for example, creating accounts in Starwhale Server.

    Projects have three roles:

    • Admin - Project administrators can read and write project data and assign project roles to users.
    • Maintainer - Project maintainers can read and write project data.
    • Guest - Project guests can only read project data.
    ActionAdminMaintainerGuest
    Manage project membersYes
    Edit projectYesYes
    View projectYesYesYes
    Create evaluationsYesYes
    Remove evaluationsYesYes
    View evaluationsYesYesYes
    Create datasetsYesYes
    Update datasetsYesYes
    Remove datasetsYesYes
    View datasetsYesYesYes
    Create modelsYesYes
    Update modelsYesYes
    Remove modelsYesYes
    View modelsYesYesYes
    Create runtimesYesYes
    Update runtimesYesYes
    Remove runtimesYesYes
    View runtimesYesYesYes

    The user who creates a project becomes the first project administrator. They can assign roles to other users later.

    - + \ No newline at end of file diff --git a/concepts/versioning/index.html b/concepts/versioning/index.html index 525093f72..055cd5a6e 100644 --- a/concepts/versioning/index.html +++ b/concepts/versioning/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    Resource versioning in Starwhale

    • Starwhale manages the history of all models, datasets, and runtimes. Every update to a specific resource appends a new version of the history.
    • Versions are identified by a version id which is a random string generated automatically by Starwhale and are ordered by their creation time.
    • Versions can have tags. Starwhale uses version tags to provide a human-friendly representation of versions. By default, Starwhale attaches a default tag to each version. The default tag is the letter "v", followed by a number. For each versioned resource, the first version tag is always tagged with "v0", the second version is tagged with "v1", and so on. And there is a special tag "latest" that always points to the last version. When a version is removed, its default tag will not be reused. For example, there is a model with tags "v0, v1, v2". When "v2" is removed, tags will be "v0, v1". And the following tag will be "v3" instead of "v2" again. You can attach your own tags to any version and remove them at any time.
    • Starwhale uses a linear history model. There is neither branch nor cycle in history.
    • History can not be rollback. When a version is to be reverted, Starwhale clones the version and appends it as a new version to the end of the history. Versions in history can be manually removed and recovered.
    - + \ No newline at end of file diff --git a/dataset/index.html b/dataset/index.html index a364f1f1a..3714424f1 100644 --- a/dataset/index.html +++ b/dataset/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    Starwhale Dataset User Guide

    overview

    Design Overview

    Starwhale Dataset Positioning

    The Starwhale Dataset contains three core stages: data construction, data loading, and data visualization. It is a data management tool for the ML/DL field. Starwhale Dataset can directly use the environment built by Starwhale Runtime, and can be seamlessly integrated with Starwhale Model and Starwhale Evaluation. It is an important part of the Starwhale MLOps toolchain.

    According to the classification of MLOps Roles in Machine Learning Operations (MLOps): Overview, Definition, and Architecture, the three stages of Starwhale Dataset target the following user groups:

    • Data construction: Data Engineer, Data Scientist
    • Data loading: Data Scientist, ML Developer
    • Data visualization: Data Engineer, Data Scientist, ML Developer

    mlops-users

    Core Functions

    • Efficient loading: The original dataset files are stored in external storage such as OSS or NAS, and are loaded on demand without having to save to disk.
    • Simple construction: Supports one-click dataset construction from Image/Video/Audio directories, json files and Huggingface datasets, and also supports writing Python code to build completely custom datasets.
    • Versioning: Can perform version tracking, data append and other operations, and avoid duplicate data storage through the internally abstracted ObjectStore.
    • Sharing: Implement bidirectional dataset sharing between Standalone instances and Cloud/Server instances through the swcli dataset copy command.
    • Visualization: The web interface of Cloud/Server instances can present multi-dimensional, multi-type data visualization of datasets.
    • Artifact storage: The Standalone instance can store locally built or distributed swds series files, while the Cloud/Server instance uses object storage to provide centralized swds artifact storage.
    • Seamless Starwhale integration: Starwhale Dataset can use the runtime environment built by Starwhale Runtime to build datasets. Starwhale Evaluation and Starwhale Model can directly specify the dataset through the --dataset parameter to complete automatic data loading, which facilitates inference, model evaluation and other environments.

    Key Elements

    • swds virtual package file: swds is different from swmp and swrt. It is not a single packaged file, but a virtual concept that specifically refers to a directory that contains dataset-related files for a version of the Starwhale dataset, including _manifest.yaml, dataset.yaml, dataset build Python scripts, and data file links, etc. You can use the swcli dataset info command to view where the swds is located. swds is the abbreviation of Starwhale Dataset.

    swds-tree.png

    • swcli dataset command line: A set of dataset-related commands, including construction, distribution and management functions. See CLI Reference for details.
    • dataset.yaml configuration file: Describes the dataset construction process. It can be completely omitted and specified through swcli dataset build parameters. dataset.yaml can be considered as a configuration file representation of the swcli dataset build command line parameters. swcli dataset build parameters take precedence over dataset.yaml.
    • Dataset Python SDK: Includes data construction, data loading, and several predefined data types. See Python SDK for details.
    • Python scripts for dataset construction: A series of scripts written using the Starwhale Python SDK to build datasets.

    Best Practices

    The construction of Starwhale Dataset is performed independently. If third-party libraries need to be introduced when writing construction scripts, using Starwhale Runtime can simplify Python dependency management and ensure reproducible dataset construction. The Starwhale platform will build in as many open source datasets as possible for users to copy datasets for immediate use.

    Command Line Grouping

    The Starwhale Dataset command line can be divided into the following stages from the perspective of usage phases:

    • Construction phase
      • swcli dataset build
    • Visualization phase
      • swcli dataset diff
      • swcli dataset head
    • Distribution phase
      • swcli dataset copy
    • Basic management
      • swcli dataset tag
      • swcli dataset info
      • swcli dataset history
      • swcli dataset list
      • swcli dataset summary
      • swcli dataset remove
      • swcli dataset recover

    Starwhale Dataset Viewer

    Currently, the Web UI in the Cloud/Server instance can visually display the dataset. Currently, only DataTypes using the Python SDK can be correctly interpreted by the frontend, with mappings as follows:

    • Image: Display thumbnails, enlarged images, MASK type images, support image/png, image/jpeg, image/webp, image/svg+xml, image/gif, image/apng, image/avif formats.
    • Audio: Displayed as an audio wave graph, playable, supports audio/mp3 and audio/wav formats.
    • Video: Displayed as a video, playable, supports video/mp4, video/avi and video/webm formats.
    • GrayscaleImage: Display grayscale images, support x/grayscale format.
    • Text: Display text, support text/plain format, set encoding format, default is utf-8.
    • Binary and Bytes: Not supported for display currently.
    • Link: The above multimedia types all support specifying links as storage paths.

    Starwhale Dataset Data Format

    The dataset consists of multiple rows, each row being a sample, each sample containing several features. The features have a dict-like structure with some simple restrictions [L]:

    • The dict keys must be str type.
    • The dict values must be Python basic types like int/float/bool/str/bytes/dict/list/tuple, or Starwhale built-in data types.
    • For the same key across different samples, the value types do not need to stay the same.
    • If the value is a list or tuple, the element data types must be consistent.
    • For dict values, the restrictions are the same as [L].

    Example:

    {
    "img": GrayscaleImage(
    link=Link(
    "123",
    offset=32,
    size=784,
    _swds_bin_offset=0,
    _swds_bin_size=8160,
    )
    ),
    "label": 0,
    }

    File Data Handling

    Starwhale Dataset handles file type data in a special way. You can ignore this section if you don't care about Starwhale's implementation.

    According to actual usage scenarios, Starwhale Dataset has two ways of handling file class data that is based on the base class starwhale.BaseArtifact:

    • swds-bin: Starwhale merges the data into several large files in its own binary format (swds-bin), which can efficiently perform indexing, slicing and loading.
    • remote-link: If the user's original data is stored in some external storage such as OSS or NAS, with a lot of original data that is inconvenient to move or has already been encapsulated by some internal dataset implementation, then you only need to use links in the data to establish indexes.

    In the same Starwhale dataset, two types of data can be included simultaneously.

    - + \ No newline at end of file diff --git a/dataset/yaml/index.html b/dataset/yaml/index.html index b7ea96f62..2a42b3881 100644 --- a/dataset/yaml/index.html +++ b/dataset/yaml/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    The dataset.yaml Specification

    tip

    dataset.yaml is optional for the swcli dataset build command.

    Building Starwhale Dataset uses dataset.yaml. Omitting dataset.yaml allows describing related configurations in swcli dataset build command line parameters. dataset.yaml can be considered as a file-based representation of the build command line configuration.

    YAML Field Descriptions

    FieldDescriptionRequiredTypeDefault
    nameName of the Starwhale DatasetYesString
    handlerImportable address of a class that inherits starwhale.SWDSBinBuildExecutor, starwhale.UserRawBuildExecutor or starwhale.BuildExecutor, or a function that returns a Generator or iterable object. Format is {module path}:{class name\|function name}YesString
    descDataset descriptionNoString""
    versiondataset.yaml format version, currently only "1.0" is supportedNoString1.0
    attrDataset build parametersNoDict
    attr.volume_sizeSize of each data file in the swds-bin dataset. Can be a number in bytes, or a number plus unit like 64M, 1GB etc.NoInt or Str64MB
    attr.alignment_sizeData alignment size of each data block in the swds-bin dataset. If set to 4k, and a data block is 7.9K, 0.1K padding will be added to make the block size a multiple of alignment_size, improving page size and read efficiency.NoInteger or String128

    Examples

    Simplest Example

    name: helloworld
    handler: dataset:ExampleProcessExecutor

    The helloworld dataset uses the ExampleProcessExecutor class in dataset.py of the dataset.yaml directory to build data.

    MNIST Dataset Build Example

    name: mnist
    handler: mnist.dataset:DatasetProcessExecutor
    desc: MNIST data and label test dataset
    attr:
    alignment_size: 128
    volume_size: 4M

    Example with handler as a generator function

    dataset.yaml contents:

    name: helloworld
    handler: dataset:iter_item

    dataset.py contents:

    def iter_item():
    for i in range(10):
    yield {"img": f"image-{i}".encode(), "label": i}
    - + \ No newline at end of file diff --git a/evaluation/heterogeneous/node-able/index.html b/evaluation/heterogeneous/node-able/index.html index 17746e10b..b27ef5cba 100644 --- a/evaluation/heterogeneous/node-able/index.html +++ b/evaluation/heterogeneous/node-able/index.html @@ -10,7 +10,7 @@ - + @@ -23,7 +23,7 @@ Refer to the link.

    Take v0.13.0-rc.1 as an example:

    kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.13.0-rc.1/nvidia-device-plugin.yml

    Note: This operation will run the NVIDIA device plugin plugin on all Kubernetes nodes. If configured before, it will be updated. Please evaluate the image version used carefully.

  • Confirm GPU can be discovered and used in the cluster. Refer to the command below. Check that nvidia.com/gpu is in the Capacity of the Jetson node. The GPU is then recognized normally by the Kubernetes cluster.

    # kubectl describe node orin | grep -A15 Capacity
    Capacity:
    cpu: 12
    ephemeral-storage: 59549612Ki
    hugepages-1Gi: 0
    hugepages-2Mi: 0
    hugepages-32Mi: 0
    hugepages-64Ki: 0
    memory: 31357608Ki
    nvidia.com/gpu: 1
    pods: 110
  • Build and Use Custom Images

    The l4t-jetpack image mentioned earlier can meet our general use. If we need to customize a more streamlined image or one with more features, we can make it based on l4t-base. Relevant Dockerfiles can refer to the image Starwhale made for mnist.

    - + \ No newline at end of file diff --git a/evaluation/heterogeneous/virtual-node/index.html b/evaluation/heterogeneous/virtual-node/index.html index 950c1ceae..828e1402a 100644 --- a/evaluation/heterogeneous/virtual-node/index.html +++ b/evaluation/heterogeneous/virtual-node/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    Virtual Kubelet as Kubernetes nodes

    Introduction

    Virtual Kubelet is an open source framework that can simulate a K8s node by mimicking the communication between kubelet and the K8s cluster.

    This solution is widely used by major cloud vendors for serverless container cluster solutions, such as Alibaba Cloud's ASK, Amazon's AWS Fargate, etc.

    Principles

    The virtual kubelet framework implements the related interfaces of kubelet for Node. With simple configuration, it can simulate a node.

    We only need to implement the PodLifecycleHandler interface to support:

    • Create, update, delete Pod
    • Get Pod status
    • Get Container logs

    Adding Devices to the Cluster

    If our device cannot serve as a K8s node due to resource constraints or other situations, we can manage these devices by using virtual kubelet to simulate a proxy node.

    The control flow between Starwhale Controller and the device is as follows:


    ┌──────────────────────┐ ┌────────────────┐ ┌─────────────────┐ ┌────────────┐
    │ Starwhale Controller ├─────►│ K8s API Server ├────►│ virtual kubelet ├────►│ Our device │
    └──────────────────────┘ └────────────────┘ └─────────────────┘ └────────────┘

    Virtual kubelet converts the Pod orchestration information sent by Starwhale Controller into control behaviors for the device, such as executing a command via ssh on the device, or sending a message via USB or serial port.

    Below is an example of using virtual kubelet to control a device not joined to the cluster that is SSH-enabled:

    1. Prepare certificates
    • Create file vklet.csr with the following content:
    [req]
    req_extensions = v3_req
    distinguished_name = req_distinguished_name

    [req_distinguished_name]

    [v3_req]
    basicConstraints = CA:FALSE
    keyUsage = digitalSignature, keyEncipherment
    extendedKeyUsage = serverAuth
    subjectAltName = @alt_names

    [alt_names]
    IP = 1.2.3.4
    • Generate the certificate:
    openssl genrsa -out vklet-key.pem 2048
    openssl req -new -key vklet-key.pem -out vklet.csr -subj '/CN=system:node:1.2.3.4;/C=US/O=system:nodes' -config ./csr.conf
    • Submit the certificate:
    cat vklet.csr| base64 | tr -d "\n" # output as content of spec.request in csr.yaml

    csr.yaml:

    apiVersion: certificates.k8s.io/v1
    kind: CertificateSigningRequest
    metadata:
    name: vklet
    spec:
    request: ******************
    signerName: kubernetes.io/kube-apiserver-client
    expirationSeconds: 1086400
    usages:
    - client auth
    kubectl apply -f csr.yaml
    kubectl certificate approve vklet
    kubectl get csr vklet -o jsonpath='{.status.certificate}'| base64 -d > vklet-cert.pem

    Now we have vklet-cert.pem.

    • Compile virtual kubelet:
    git clone https://github.com/virtual-kubelet/virtual-kubelet
    cd virtual-kubelet && make build

    Create the node configuration file mock.json:

    {
    "virtual-kubelet":
    {
    "cpu": "100",
    "memory": "100Gi",
    "pods": "100"
    }
    }

    Start virtual kubelet:

    export APISERVER_CERT_LOCATION=/path/to/vklet-cert.pem
    export APISERVER_KEY_LOCATION=/path/to/vklet-key.pem
    export KUBECONFIG=/path/to/kubeconfig
    virtual-kubelet --provider mock --provider-config /path/to/mock.json

    Now we have simulated a node with 100 cores + 100GB memory using virtual kubelet.

    • Add PodLifecycleHandler implementation to convert important information in Pod orchestration into ssh command execution, and collect logs for Starwhale Controller to collect.

    See ssh executor for a concrete implementation.

    - + \ No newline at end of file diff --git a/evaluation/index.html b/evaluation/index.html index 40db9ffb8..60400bbb7 100644 --- a/evaluation/index.html +++ b/evaluation/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    Starwhale Model Evaluation

    Design Overview

    Starwhale Evaluation Positioning

    The goal of Starwhale Evaluation is to provide end-to-end management for model evaluation, including creating Jobs, distributing Tasks, viewing model evaluation reports and basic management. Starwhale Evaluation is a specific application of Starwhale Model, Starwhale Dataset, and Starwhale Runtime in the model evaluation scenario. Starwhale Evaluation is part of the MLOps toolchain built by Starwhale. More applications like Starwhale Model Serving, Starwhale Training will be included in the future.

    Core Features

    • Visualization: Both swcli and the Web UI provide visualization of model evaluation results, supporting comparison of multiple results. Users can also customize logging of intermediate processes.

    • Multi-scenario Adaptation: Whether it's a notebook, desktop or distributed cluster environment, the same commands, Python scripts, artifacts and operations can be used for model evaluation. This satisfies different computational power and data volume requirements.

    • Seamless Starwhale Integration: Leverage Starwhale Runtime for the runtime environment, Starwhale Dataset as data input, and run models from Starwhale Model. Configuration is simple whether using swcli, Python SDK or Cloud/Server instance Web UI.

    Key Elements

    • swcli model run: Command line for bulk offline model evaluation.
    • swcli model serve: Command line for online model evaluation.

    Best Practices

    Command Line Grouping

    From the perspective of completing an end-to-end Starwhale Evaluation workflow, commands can be grouped as:

    • Preparation Stage
      • swcli dataset build or Starwhale Dataset Python SDK
      • swcli model build or Starwhale Model Python SDK
      • swcli runtime build
    • Evaluation Stage
      • swcli model run
      • swcli model serve
    • Results Stage
      • swcli job info
    • Basic Management
      • swcli job list
      • swcli job remove
      • swcli job recover

    Abstraction job-step-task

    • job: A model evaluation task is a job, which contains one or more steps.

    • step: A step corresponds to a stage in the evaluation process. With the default PipelineHandler, steps are predict and evaluate. For custom evaluation processes using @handler, @evaluation.predict, @evaluation.evaluate decorators, steps are the decorated functions. Steps can have dependencies, forming a DAG. A step contains one or more tasks. Tasks in the same step have the same logic but different inputs. A common approach is to split the dataset into multiple parts, with each part passed to a task. Tasks can run in parallel.

    • task: A task is the final running entity. In Cloud/Server instances, a task is a container in a Pod. In Standalone instances, a task is a Python Thread.

    The job-step-task abstraction is the basis for implementing distributed runs in Starwhale Evaluation.

    - + \ No newline at end of file diff --git a/faq/index.html b/faq/index.html index c354c550f..2c7f3b02c 100644 --- a/faq/index.html +++ b/faq/index.html @@ -10,13 +10,13 @@ - +
    - + \ No newline at end of file diff --git a/getting-started/cloud/index.html b/getting-started/cloud/index.html index 0fce05dd3..fd46b8202 100644 --- a/getting-started/cloud/index.html +++ b/getting-started/cloud/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    Getting started with Starwhale Cloud

    Starwhale Cloud is hosted on Aliyun with the domain name https://cloud.starwhale.cn. In the futher, we will launch the service on AWS with the domain name https://cloud.starwhale.ai. It's important to note that these are two separate instances that are not interconnected, and accounts and data are not shared. You can choose either one to get started.

    You need to install the Starwhale Client (swcli) at first.

    Sign Up for Starwhale Cloud and create your first project

    You can either directly log in with your GitHub or Weixin account or sign up for an account. You will be asked for an account name if you log in with your GitHub or Weixin account.

    Then you can create a new project. In this tutorial, we will use the name demo for the project name.

    Build the dataset, model, and runtime on your local machine

    Follow step 1 to step 4 in Getting started with Starwhale Standalone to create:

    • a Starwhale model named mnist
    • a Starwhale dataset named mnist
    • a Starwhale runtime named pytorch

    Login to the cloud instance

    swcli instance login --username <your account name> --password <your password> --alias swcloud https://cloud.starwhale.cn

    Copy the dataset, model, and runtime to the cloud instance

    swcli model copy mnist swcloud/project/<your account name>:demo
    swcli dataset copy mnist swcloud/project/<your account name>:demo
    swcli runtime copy pytorch swcloud/project/<your account name>:demo

    Run an evaluation with the web UI

    console-create-job.gif

    Congratulations! You have completed the Starwhale Cloud Getting Started Guide.

    - + \ No newline at end of file diff --git a/getting-started/index.html b/getting-started/index.html index 0f1b01304..da1e70c3c 100644 --- a/getting-started/index.html +++ b/getting-started/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    Getting started

    First, you need to install the Starwhale Client (swcli), which can be done by running the following command:

    python3 -m pip install starwhale

    For more information, see the swcli installation guide.

    Depending on your instance type, there are three getting-started guides available for you:

    • Getting started with Starwhale Standalone - This guide helps you run an MNIST evaluation on your desktop PC/laptop. It is the fastest and simplest way to get started with Starwhale.
    • Getting started with Starwhale Server - This guide helps you install Starwhale Server in your private data center and run an MNIST evaluation. At the end of the tutorial, you will have a Starwhale Server instance where you can run model evaluations on and manage your datasets and models.
    • Getting started with Starwhale Cloud - This guide helps you create an account on Starwhale Cloud and run an MNIST evaluation. It is the easiest way to experience all Starwhale features.
    - + \ No newline at end of file diff --git a/getting-started/runtime/index.html b/getting-started/runtime/index.html index 283f1ef16..8a60de3a8 100644 --- a/getting-started/runtime/index.html +++ b/getting-started/runtime/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    Getting Started with Starwhale Runtime

    This article demonstrates how to build a Starwhale Runtime of the Pytorch environment and how to use it. This runtime can meet the dependency requirements of the six examples in Starwhale: mnist, speech commands, nmt, cifar10, ag_news, and PennFudan. Links to relevant code: example/runtime/pytorch.

    You can learn the following things from this tutorial:

    • How to build a Starwhale Runtime.
    • How to use a Starwhale Runtime in different scenarios.
    • How to release a Starwhale Runtime.

    Prerequisites

    Run the following command to clone the example code:

    git clone https://github.com/star-whale/starwhale.git
    cd starwhale/example/runtime/pytorch # for users in the mainland of China, use pytorch-cn-mirror instead.

    Build Starwhale Runtime

    ❯ swcli -vvv runtime build --yaml runtime.yaml

    Use Starwhale Runtime in the standalone instance

    Use Starwhale Runtime in the shell

    # Activate the runtime
    swcli runtime activate pytorch

    swcli runtime activate will download all python dependencies of the runtime, which may take a long time.

    All dependencies are ready in your python environment when the runtime is activated. It is similar to source venv/bin/activate of virtualenv or the conda activate command of conda. If you close the shell or switch to another shell, you need to reactivate the runtime.

    Use Starwhale Runtime in swcli

    # Use the runtime when building a Starwhale Model
    swcli model build . --runtime pytorch
    # Use the runtime when building a Starwhale Dataset
    swcli dataset build --yaml /path/to/dataset.yaml --runtime pytorch
    # Run a model evaluation with the runtime
    swcli model run --uri mnist/version/v0 --dataset mnist --runtime pytorch

    Copy Starwhale Runtime to another instance

    You can copy the runtime to a server/cloud instance, which can then be used in the server/cloud instance or downloaded by other users.

    # Copy the runtime to a server instance named 'pre-k8s'
    ❯ swcli runtime copy pytorch cloud://pre-k8s/project/starwhale
    - + \ No newline at end of file diff --git a/getting-started/server/index.html b/getting-started/server/index.html index 19006f821..4e5048c60 100644 --- a/getting-started/server/index.html +++ b/getting-started/server/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    Getting started with Starwhale Server

    Install Starwhale Server

    To install Starwhale Server, see the installation guide.

    Create your first project

    Login to the server

    Open your browser and enter your server's URL in the address bar. Login with your username(starwhale) and password(abcd1234).

    console-artifacts.gif

    Create a new project

    Build the dataset, model, and runtime on your local machine

    Follow step 1 to step 4 in Getting started with Starwhale Standalone to create:

    • a Starwhale model named mnist
    • a Starwhale dataset named mnist
    • a Starwhale runtime named pytorch

    Copy the dataset, the model, and the runtime to the server

    swcli instance login --username <your username> --password <your password> --alias server <Your Server URL>

    swcli model copy mnist server/project/demo
    swcli dataset copy mnist server/project/demo
    swcli runtime copy pytorch server/project/demo

    Use the Web UI to run an evaluation

    Navigate to the "demo" project in your browser and create a new one.

    console-create-job.gif

    Congratulations! You have completed the Starwhale Server Getting Started Guide.

    - + \ No newline at end of file diff --git a/getting-started/standalone/index.html b/getting-started/standalone/index.html index ccb92861d..cff1e0f39 100644 --- a/getting-started/standalone/index.html +++ b/getting-started/standalone/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    Getting started with Starwhale Standalone

    When the Starwhale Client (swcli) is installed, you are ready to use Starwhale Standalone.

    We also provide a Jupyter Notebook example, you can try it in Google Colab or in your local vscode/jupyterlab.

    Downloading Examples

    Download Starwhale examples by cloning the Starwhale project via:

    GIT_LFS_SKIP_SMUDGE=1 git clone https://github.com/star-whale/starwhale.git --depth 1
    cd starwhale

    To save time in the example downloading, we skip git-lfs and other commits info. We will use ML/DL HelloWorld code MNIST to start your Starwhale journey. The following steps are all performed in the starwhale directory.

    Core Workflow

    Building a Pytorch Runtime

    Runtime example codes are in the example/runtime/pytorch directory.

    • Build the Starwhale runtime bundle:

      swcli runtime build --yaml example/runtime/pytorch/runtime.yaml
      tip

      When you first build runtime, creating an isolated python environment and downloading python dependencies will take a lot of time. The command execution time is related to the network environment of the machine and the number of packages in the runtime.yaml. Using the befitting pypi mirror and cache config in the ~/.pip/pip.conf file is a recommended practice.

      For users in the mainland of China, the following conf file is an option:

      [global]
      cache-dir = ~/.cache/pip
      index-url = https://pypi.tuna.tsinghua.edu.cn/simple
      extra-index-url = https://mirrors.aliyun.com/pypi/simple/
    • Check your local Starwhale Runtime:

      swcli runtime list
      swcli runtime info pytorch

    Building a Model

    Model example codes are in the example/mnist directory.

    • Download the pre-trained model file:

      cd example/mnist
      make download-model
      # For users in the mainland of China, please add `CN=1` environment for make command:
      # CN=1 make download-model
      cd -
    • Build a Starwhale model:

      swcli model build example/mnist --runtime pytorch
    • Check your local Starwhale models:

      swcli model list
      swcli model info mnist

    Building a Dataset

    Dataset example codes are in the example/mnist directory.

    • Download the MNIST raw data:

      cd example/mnist
      make download-data
      # For users in the mainland of China, please add `CN=1` environment for make command:
      # CN=1 make download-data
      cd -
    • Build a Starwhale dataset:

      swcli dataset build --yaml example/mnist/dataset.yaml
    • Check your local Starwhale dataset:

      swcli dataset list
      swcli dataset info mnist
      swcli dataset head mnist

    Running an Evaluation Job

    • Create an evaluation job:

      swcli -vvv model run --uri mnist --dataset mnist --runtime pytorch
    • Check the evaluation result

      swcli job list
      swcli job info $(swcli job list | grep mnist | grep success | awk '{print $1}' | head -n 1)

    Congratulations! You have completed the Starwhale Standalone Getting Started Guide.

    - + \ No newline at end of file diff --git a/index.html b/index.html index 3ac40f818..636b20fb0 100644 --- a/index.html +++ b/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    What is Starwhale

    Overview

    Starwhale is an MLOps/LLMOps platform that make your model creation, evaluation and publication much easier. It aims to create a handy tool for data scientists and machine learning engineers.

    Starwhale helps you:

    • Keep track of your training/testing dataset history including data items and their labels, so that you can easily access them.
    • Manage your model packages that you can share across your team.
    • Run your models in different environments, either on a Nvidia GPU server or on an embedded device like Cherry Pi.
    • Create a online service with interactive Web UI for your models.

    Starwhale is designed to be an open platform. You can create your own plugins to meet your requirements.

    Deployment options

    Each deployment of Starwhale is called an instance. All instances can be managed by the Starwhale Client (swcli).

    You can start using Starwhale with one of the following instance types:

    • Starwhale Standalone - Rather than a running service, Starwhale Standalone is actually a repository that resides in your local file system. It is created and managed by the Starwhale Client (swcli). You only need to install swcli to use it. Currently, each user on a single machine can have only ONE Starwhale Standalone instance. We recommend you use the Starwhale Standalone to build and test your datasets, runtime, and models before pushing them to Starwhale Server/Cloud instances.
    • Starwhale Server - Starwhale Server is a service deployed on your local server. Besides text-only results from the Starwhale Client (swcli), Starwhale Server provides Web UI for you to manage your datasets and models, evaluate your models in your local Kubernetes cluster, and review the evaluation results.
    • Starwhale Cloud - Starwhale Cloud is a managed service hosted on public clouds. By registering an account on https://cloud.starwhale.cn, you are ready to use Starwhale without needing to install, operate, and maintain your own instances. Starwhale Cloud also provides public resources for you to download, like datasets, runtimes, and models. Check the "starwhale/public" project on Starwhale Cloud for more details.

    When choosing which instance type to use, consider the following:

    Instance TypeDeployment locationMaintained byUser InterfaceScalability
    Starwhale StandaloneYour laptop or any server in your data centerNot requiredCommand lineNot scalable
    Starwhale ServerYour data centerYourselfWeb UI and command lineScalable, depends on your Kubernetes cluster
    Starwhale CloudPublic cloud, like AWS or Aliyunthe Starwhale TeamWeb UI and command lineScalable, but currently limited by the freely available resource on the cloud
    - + \ No newline at end of file diff --git a/model/index.html b/model/index.html index fd9c9c7ff..0bd0e9cfe 100644 --- a/model/index.html +++ b/model/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    Starwhale Model

    overview

    A Starwhale Model is a standard format for packaging machine learning models that can be used for various purposes, like model fine-tuning, model evaluation, and online serving. A Starwhale Model contains the model file, inference codes, configuration files, and any other files required to run the model.

    Create a Starwhale Model

    There are two ways to create a Starwhale Model: by swcli or by Python SDK.

    Create a Starwhale Model by swcli

    To create a Starwhale Model by swcli, you need to define a model.yaml, which describes some required information about the model package, and run the following command:

    swcli model build . --model-yaml /path/to/model.yaml

    For more information about the command and model.yaml, see the swcli reference. model.yaml is optional for model building.

    Create a Starwhale Model by Python SDK

    from starwhale import model, predict

    @predict
    def predict_img(data):
    ...

    model.build(name="mnist", modules=[predict_img])

    Model Management

    Model Management by swcli

    CommandDescription
    swcli model listList all Starwhale Models in a project
    swcli model infoShow detail information about a Starwhale Model
    swcli model copyCopy a Starwhale Model to another location
    swcli model removeRemove a Starwhale Model
    swcli model recoverRecover a previously removed Starwhale Model

    Model Management by WebUI

    Model History

    Starwhale Models are versioned. The general rules about versions are described in Resource versioning in Starwhale.

    Model History Management by swcli

    CommandDescription
    swcli model historyList all versions of a Starwhale Model
    swcli model infoShow detail information about a Starwhale Model version
    swcli model diffCompare two versions of a Starwhale model
    swcli model copyCopy a Starwhale Model version to a new one
    swcli model removeRemove a Starwhale Model version
    swcli model recoverRecover a previously removed Starwhale Model version

    Model Evaluation

    Model Evaluation by swcli

    CommandDescription
    swcli model runCreate an evaluation with a Starwhale Model

    The Storage Format

    The Starwhale Model is a tarball file that contains the source directory.

    - + \ No newline at end of file diff --git a/model/yaml/index.html b/model/yaml/index.html index 0269f2211..e7875c8fb 100644 --- a/model/yaml/index.html +++ b/model/yaml/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    The model.yaml Specification

    tip

    model.yaml is optional for swcli model build.

    When building a Starwhale Model using the swcli model build command, you can specify a yaml file that follows a specific format via the --model-yaml parameter to simplify specifying build parameters.

    Even without specifying the --model-yaml parameter, swcli model build will automatically look for a model.yaml file under the ${workdir} directory and extract parameters from it. Parameters specified on the swcli model build command line take precedence over equivalent configurations in model.yaml, so you can think of model.yaml as a file-based representation of the build command line.

    When building a Starwhale Model using the Python SDK, the model.yaml file does not take effect.

    YAML Field Descriptions

    FieldDescriptionRequiredTypeDefault
    nameName of the Starwhale Model, equivalent to --name parameter.NoString
    run.modulesPython Modules searched during model build, can specify multiple entry points for model execution, format is Python Importable path. Equivalent to --module parameter.YesList[String]
    run.handlerDeprecated alias of run.modules, can only specify one entry point.NoString
    versiondataset.yaml format version, currently only supports "1.0"NoString1.0
    descModel description, equivalent to --desc parameter.NoString

    Example


    name: helloworld

    run:
    modules:
    - src.evaluator

    desc: "example yaml"

    A Starwhale model named helloworld, searches for functions decorated with @evaluation.predict, @evaluation.evaluate or @handler, or classes inheriting from PipelineHandler in src/evaluator.py under ${WORKDIR} of the swcli model build command. These functions or classes will be added to the list of runnable entry points for the Starwhale model. When running the model via swcli model run or Web UI, select the corresponding entry point (handler) to run.

    model.yaml is optional, parameters defined in yaml can also be specified via swcli command line parameters.


    swcli model build . --model-yaml model.yaml

    Is equivalent to:


    swcli model build . --name helloworld --module src.evaluator --desc "example yaml"

    - + \ No newline at end of file diff --git a/next/cloud/billing/bills/index.html b/next/cloud/billing/bills/index.html index faa711bd7..e1baf98d0 100644 --- a/next/cloud/billing/bills/index.html +++ b/next/cloud/billing/bills/index.html @@ -10,13 +10,13 @@ - + - + \ No newline at end of file diff --git a/next/cloud/billing/index.html b/next/cloud/billing/index.html index 61b4ec850..3fb431b1a 100644 --- a/next/cloud/billing/index.html +++ b/next/cloud/billing/index.html @@ -10,13 +10,13 @@ - + - + \ No newline at end of file diff --git a/next/cloud/billing/recharge/index.html b/next/cloud/billing/recharge/index.html index e348f75f0..e73ef8f95 100644 --- a/next/cloud/billing/recharge/index.html +++ b/next/cloud/billing/recharge/index.html @@ -10,13 +10,13 @@ - + - + \ No newline at end of file diff --git a/next/cloud/billing/refund/index.html b/next/cloud/billing/refund/index.html index 4bdd1253c..0df10461f 100644 --- a/next/cloud/billing/refund/index.html +++ b/next/cloud/billing/refund/index.html @@ -10,13 +10,13 @@ - + - + \ No newline at end of file diff --git a/next/cloud/billing/voucher/index.html b/next/cloud/billing/voucher/index.html index c78556c5d..1dfda3cdf 100644 --- a/next/cloud/billing/voucher/index.html +++ b/next/cloud/billing/voucher/index.html @@ -10,13 +10,13 @@ - + - + \ No newline at end of file diff --git a/next/cloud/index.html b/next/cloud/index.html index c5e7ffc66..a51ddb416 100644 --- a/next/cloud/index.html +++ b/next/cloud/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    Starwhale Cloud User Guide

    Starwhale Cloud is a service hosted on public cloud and operated by the Starwhale team. The access url is https://cloud.starwhale.cn.

    - + \ No newline at end of file diff --git a/next/community/contribute/index.html b/next/community/contribute/index.html index 14d59fb3e..586ac62d6 100644 --- a/next/community/contribute/index.html +++ b/next/community/contribute/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    Contribute to Starwhale

    Getting Involved/Contributing

    We welcome and encourage all contributions to Starwhale, including and not limited to:

    • Describe the problems encountered during use.
    • Submit feature request.
    • Discuss in Slack and Github Issues.
    • Code Review.
    • Improve docs, tutorials and examples.
    • Fix Bug.
    • Add Test Case.
    • Code readability and code comments to import readability.
    • Develop new features.
    • Write enhancement proposal.

    You can get involved, get updates and contact Starwhale developers in the following ways:

    Starwhale Resources

    Code Structure

    • client: swcli and Python SDK with Pure Python3, which includes all Standalone Instance features.
      • api: Python SDK.
      • cli: Command Line Interface entrypoint.
      • base: Python base abstract.
      • core: Starwhale core concepts which includes Dataset,Model,Runtime,Project, job and Evaluation, etc.
      • utils: Python utilities lib.
    • console: frontend with React + TypeScript.
    • server:Starwhale Controller with java, which includes all Starwhale Cloud Instance backend apis.
    • docker:Helm Charts, dockerfile.
    • docs:Starwhale官方文档。
    • example:Example code.
    • scripts:Bash and Python scripts for E2E testing and software releases, etc.

    Fork and clone the repository

    You will need to fork the code of Starwhale repository and clone it to your local machine.

    • Fork Starwhale repository: Fork Starwhale Github Repo,For more usage details, please refer to: Fork a repo

    • Install Git-LFS:Git-LFS

       git lfs install
    • Clone code to local machine

      git clone https://github.com/${your username}/starwhale.git

    Development environment for Standalone Instance

    Standalone Instance is written in Python3. When you want to modify swcli and sdk, you need to build the development environment.

    Standalone development environment prerequisites

    • OS: Linux or macOS
    • Python: 3.7~3.11
    • Docker: >=19.03(optional)
    • Python isolated env tools:Python venv, virtualenv or conda, etc

    Building from source code

    Based on the previous step, clone to the local directory: starwhale, and enter the client subdirectory:

    cd starwhale/client

    Create an isolated python environment with conda:

    conda create -n starwhale-dev python=3.8 -y
    conda activate starwhale-dev

    Install client package and python dependencies into the starwhale-dev environment:

    make install-sw
    make install-dev-req

    Validate with the swcli --version command. In the development environment, the version is 0.0.0.dev0:

    ❯ swcli --version
    swcli, version 0.0.0.dev0

    ❯ swcli --version
    /home/username/anaconda3/envs/starwhale-dev/bin/swcli

    Modifying the code

    When you modify the code, you need not to install python package(run make install-sw command) again. .editorconfig will be imported into the most IDE and code editors which helps maintain consistent coding styles for multiple developers.

    Lint and Test

    Run unit test, E2E test, mypy lint, flake lint and isort check in the starwhale directory.

    make client-all-check

    Development environment for Cloud Instance

    Cloud Instance is written in Java(backend) and React+TypeScript(frontend).

    Development environment for Console

    Development environment for Server

    • Language: Java
    • Build tool: Maven
    • Development framework: Spring Boot+Mybatis
    • Unit test framework:Junit5
      • Mockito used for mocking
      • Hamcrest used for assertion
      • Testcontainers used for providing lightweight, throwaway instances of common databases, Selenium web browsers that can run in a Docker container.
    • Check style tool:use maven-checkstyle-plugin

    Server development environment prerequisites

    • OS: Linux, macOS or Windows
    • Docker: >=19.03
    • JDK: >=11
    • Maven: >=3.8.1
    • Mysql: >=8.0.29
    • Minio
    • Kubernetes cluster/Minikube(If you don't have a k8s cluster, you can use Minikube as an alternative for development and debugging)

    Modify the code and add unit tests

    Now you can enter the corresponding module to modify and adjust the code on the server side. The main business code directory is src/main/java, and the unit test directory is src/test/java.

    Execute code check and run unit tests

    cd starwhale/server
    mvn clean test

    Deploy the server at local machine

    • Dependent services that need to be deployed

      • Minikube(Optional. Minikube can be used when there is no k8s cluster, there is the installation doc: Minikube

        minikube start
        minikube addons enable ingress
        minikube addons enable ingress-dns
      • Mysql

        docker run --name sw-mysql -d \
        -p 3306:3306 \
        -e MYSQL_ROOT_PASSWORD=starwhale \
        -e MYSQL_USER=starwhale \
        -e MYSQL_PASSWORD=starwhale \
        -e MYSQL_DATABASE=starwhale \
        mysql:latest
      • Minio

        docker run --name minio -d \
        -p 9000:9000 --publish 9001:9001 \
        -e MINIO_DEFAULT_BUCKETS='starwhale' \
        -e MINIO_ROOT_USER="minioadmin" \
        -e MINIO_ROOT_PASSWORD="minioadmin" \
        bitnami/minio:latest
    • Package server program

      If you need to deploy the front-end at the same time when deploying the server, you can execute the build command of the front-end part first, and then execute 'mvn clean package', and the compiled front-end files will be automatically packaged.

      Use the following command to package the program

        cd starwhale/server
      mvn clean package
    • Specify the environment required for server startup

      # Minio env
      export SW_STORAGE_ENDPOINT=http://${Minio IP,default is:27.0.0.1}:9000
      export SW_STORAGE_BUCKET=${Minio bucket,default is:starwhale}
      export SW_STORAGE_ACCESSKEY=${Minio accessKey,default is:starwhale}
      export SW_STORAGE_SECRETKEY=${Minio secretKey,default is:starwhale}
      export SW_STORAGE_REGION=${Minio region,default is:local}
      # kubernetes env
      export KUBECONFIG=${the '.kube' file path}\.kube\config

      export SW_INSTANCE_URI=http://${Server IP}:8082
      export SW_METADATA_STORAGE_IP=${Mysql IP,default: 127.0.0.1}
      export SW_METADATA_STORAGE_PORT=${Mysql port,default: 3306}
      export SW_METADATA_STORAGE_DB=${Mysql dbname,default: starwhale}
      export SW_METADATA_STORAGE_USER=${Mysql user,default: starwhale}
      export SW_METADATA_STORAGE_PASSWORD=${user password,default: starwhale}
    • Deploy server service

      You can use the IDE or the command to deploy.

      java -jar controller/target/starwhale-controller-0.1.0-SNAPSHOT.jar
    • Debug

      there are two ways to debug the modified function:

      • Use swagger-ui for interface debugging, visit /swagger-ui/index.html to find the corresponding api
      • Debug the corresponding function directly in the ui (provided that the front-end code has been built in advance according to the instructions when packaging)
    - + \ No newline at end of file diff --git a/next/concepts/index.html b/next/concepts/index.html index 7de454bcf..4fac7c703 100644 --- a/next/concepts/index.html +++ b/next/concepts/index.html @@ -10,13 +10,13 @@ - + - + \ No newline at end of file diff --git a/next/concepts/names/index.html b/next/concepts/names/index.html index 81120e196..6d5edf941 100644 --- a/next/concepts/names/index.html +++ b/next/concepts/names/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    Names in Starwhale

    Names mean project names, model names, dataset names, runtime names, and tag names.

    Names Limitation

    • Names are case-insensitive.
    • A name MUST only consist of letters A-Z a-z, digits 0-9, the hyphen character -, the dot character ., and the underscore character _.
    • A name should always start with a letter or the _ character.
    • The maximum length of a name is 80.

    Names uniqueness requirement

    • The resource name should be a unique string within its owner. For example, the project name should be unique in the owner instance, and the model name should be unique in the owner project.
    • The resource name can not be used by any other resource of the same kind in their owner, including those removed ones. For example, Project "apple" can not have two models named "Alice", even if one of them is already removed.
    • Different kinds of resources can have the same name. For example, a project and a model can be called "Alice" simultaneously.
    • Resources with different owners can have the same name. For example, a model in project "Apple" and a model in project "Banana" can have the same name "Alice".
    • Garbage-collected resources' names can be reused. For example, after the model with the name "Alice" in project "Apple" is removed and garbage collected, the project can have a new model with the same name "Alice".
    - + \ No newline at end of file diff --git a/next/concepts/project/index.html b/next/concepts/project/index.html index dd2633038..81c865eb8 100644 --- a/next/concepts/project/index.html +++ b/next/concepts/project/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    Project in Starwhale

    "Project" is the basic unit for organizing different resources like models, datasets, etc. You may use projects for different purposes. For example, you can create a project for a data scientist team, a product line, or a specific model. Users usually work on one or more projects in their daily lives.

    Starwhale Server/Cloud projects are grouped by accounts. Starwhale Standalone does not have accounts. So you will not see any account name prefix in Starwhale Standalone projects. Starwhale Server/Cloud projects can be either "public" or "private". Public projects means all users on the same instance are assigned a "guest" role to the project by default. For more information about roles, see Roles and permissions in Starwhale.

    A self project is created automatically and configured as the default project in Starwhale Standalone.

    - + \ No newline at end of file diff --git a/next/concepts/roles-permissions/index.html b/next/concepts/roles-permissions/index.html index 2a2e30e37..663ae6f2c 100644 --- a/next/concepts/roles-permissions/index.html +++ b/next/concepts/roles-permissions/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    Roles and permissions in Starwhale

    Roles are used to assign permissions to users. Only Starwhale Server/Cloud has roles and permissions, and Starwhale Standalone does not.The Administrator role is automatically created and assigned to the user "admin". Some sensitive operations can only be performed by users with the Administrator role, for example, creating accounts in Starwhale Server.

    Projects have three roles:

    • Admin - Project administrators can read and write project data and assign project roles to users.
    • Maintainer - Project maintainers can read and write project data.
    • Guest - Project guests can only read project data.
    ActionAdminMaintainerGuest
    Manage project membersYes
    Edit projectYesYes
    View projectYesYesYes
    Create evaluationsYesYes
    Remove evaluationsYesYes
    View evaluationsYesYesYes
    Create datasetsYesYes
    Update datasetsYesYes
    Remove datasetsYesYes
    View datasetsYesYesYes
    Create modelsYesYes
    Update modelsYesYes
    Remove modelsYesYes
    View modelsYesYesYes
    Create runtimesYesYes
    Update runtimesYesYes
    Remove runtimesYesYes
    View runtimesYesYesYes

    The user who creates a project becomes the first project administrator. They can assign roles to other users later.

    - + \ No newline at end of file diff --git a/next/concepts/versioning/index.html b/next/concepts/versioning/index.html index 2f61124fc..1f696f15d 100644 --- a/next/concepts/versioning/index.html +++ b/next/concepts/versioning/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    Resource versioning in Starwhale

    • Starwhale manages the history of all models, datasets, and runtimes. Every update to a specific resource appends a new version of the history.
    • Versions are identified by a version id which is a random string generated automatically by Starwhale and are ordered by their creation time.
    • Versions can have tags. Starwhale uses version tags to provide a human-friendly representation of versions. By default, Starwhale attaches a default tag to each version. The default tag is the letter "v", followed by a number. For each versioned resource, the first version tag is always tagged with "v0", the second version is tagged with "v1", and so on. And there is a special tag "latest" that always points to the last version. When a version is removed, its default tag will not be reused. For example, there is a model with tags "v0, v1, v2". When "v2" is removed, tags will be "v0, v1". And the following tag will be "v3" instead of "v2" again. You can attach your own tags to any version and remove them at any time.
    • Starwhale uses a linear history model. There is neither branch nor cycle in history.
    • History can not be rollback. When a version is to be reverted, Starwhale clones the version and appends it as a new version to the end of the history. Versions in history can be manually removed and recovered.
    - + \ No newline at end of file diff --git a/next/dataset/index.html b/next/dataset/index.html index c0a62bd03..0346f8922 100644 --- a/next/dataset/index.html +++ b/next/dataset/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    Starwhale Dataset User Guide

    overview

    Design Overview

    Starwhale Dataset Positioning

    The Starwhale Dataset contains three core stages: data construction, data loading, and data visualization. It is a data management tool for the ML/DL field. Starwhale Dataset can directly use the environment built by Starwhale Runtime, and can be seamlessly integrated with Starwhale Model and Starwhale Evaluation. It is an important part of the Starwhale MLOps toolchain.

    According to the classification of MLOps Roles in Machine Learning Operations (MLOps): Overview, Definition, and Architecture, the three stages of Starwhale Dataset target the following user groups:

    • Data construction: Data Engineer, Data Scientist
    • Data loading: Data Scientist, ML Developer
    • Data visualization: Data Engineer, Data Scientist, ML Developer

    mlops-users

    Core Functions

    • Efficient loading: The original dataset files are stored in external storage such as OSS or NAS, and are loaded on demand without having to save to disk.
    • Simple construction: Supports one-click dataset construction from Image/Video/Audio directories, json files and Huggingface datasets, and also supports writing Python code to build completely custom datasets.
    • Versioning: Can perform version tracking, data append and other operations, and avoid duplicate data storage through the internally abstracted ObjectStore.
    • Sharing: Implement bidirectional dataset sharing between Standalone instances and Cloud/Server instances through the swcli dataset copy command.
    • Visualization: The web interface of Cloud/Server instances can present multi-dimensional, multi-type data visualization of datasets.
    • Artifact storage: The Standalone instance can store locally built or distributed swds series files, while the Cloud/Server instance uses object storage to provide centralized swds artifact storage.
    • Seamless Starwhale integration: Starwhale Dataset can use the runtime environment built by Starwhale Runtime to build datasets. Starwhale Evaluation and Starwhale Model can directly specify the dataset through the --dataset parameter to complete automatic data loading, which facilitates inference, model evaluation and other environments.

    Key Elements

    • swds virtual package file: swds is different from swmp and swrt. It is not a single packaged file, but a virtual concept that specifically refers to a directory that contains dataset-related files for a version of the Starwhale dataset, including _manifest.yaml, dataset.yaml, dataset build Python scripts, and data file links, etc. You can use the swcli dataset info command to view where the swds is located. swds is the abbreviation of Starwhale Dataset.

    swds-tree.png

    • swcli dataset command line: A set of dataset-related commands, including construction, distribution and management functions. See CLI Reference for details.
    • dataset.yaml configuration file: Describes the dataset construction process. It can be completely omitted and specified through swcli dataset build parameters. dataset.yaml can be considered as a configuration file representation of the swcli dataset build command line parameters. swcli dataset build parameters take precedence over dataset.yaml.
    • Dataset Python SDK: Includes data construction, data loading, and several predefined data types. See Python SDK for details.
    • Python scripts for dataset construction: A series of scripts written using the Starwhale Python SDK to build datasets.

    Best Practices

    The construction of Starwhale Dataset is performed independently. If third-party libraries need to be introduced when writing construction scripts, using Starwhale Runtime can simplify Python dependency management and ensure reproducible dataset construction. The Starwhale platform will build in as many open source datasets as possible for users to copy datasets for immediate use.

    Command Line Grouping

    The Starwhale Dataset command line can be divided into the following stages from the perspective of usage phases:

    • Construction phase
      • swcli dataset build
    • Visualization phase
      • swcli dataset diff
      • swcli dataset head
    • Distribution phase
      • swcli dataset copy
    • Basic management
      • swcli dataset tag
      • swcli dataset info
      • swcli dataset history
      • swcli dataset list
      • swcli dataset summary
      • swcli dataset remove
      • swcli dataset recover

    Starwhale Dataset Viewer

    Currently, the Web UI in the Cloud/Server instance can visually display the dataset. Currently, only DataTypes using the Python SDK can be correctly interpreted by the frontend, with mappings as follows:

    • Image: Display thumbnails, enlarged images, MASK type images, support image/png, image/jpeg, image/webp, image/svg+xml, image/gif, image/apng, image/avif formats.
    • Audio: Displayed as an audio wave graph, playable, supports audio/mp3 and audio/wav formats.
    • Video: Displayed as a video, playable, supports video/mp4, video/avi and video/webm formats.
    • GrayscaleImage: Display grayscale images, support x/grayscale format.
    • Text: Display text, support text/plain format, set encoding format, default is utf-8.
    • Binary and Bytes: Not supported for display currently.
    • Link: The above multimedia types all support specifying links as storage paths.

    Starwhale Dataset Data Format

    The dataset consists of multiple rows, each row being a sample, each sample containing several features. The features have a dict-like structure with some simple restrictions [L]:

    • The dict keys must be str type.
    • The dict values must be Python basic types like int/float/bool/str/bytes/dict/list/tuple, or Starwhale built-in data types.
    • For the same key across different samples, the value types do not need to stay the same.
    • If the value is a list or tuple, the element data types must be consistent.
    • For dict values, the restrictions are the same as [L].

    Example:

    {
    "img": GrayscaleImage(
    link=Link(
    "123",
    offset=32,
    size=784,
    _swds_bin_offset=0,
    _swds_bin_size=8160,
    )
    ),
    "label": 0,
    }

    File Data Handling

    Starwhale Dataset handles file type data in a special way. You can ignore this section if you don't care about Starwhale's implementation.

    According to actual usage scenarios, Starwhale Dataset has two ways of handling file class data that is based on the base class starwhale.BaseArtifact:

    • swds-bin: Starwhale merges the data into several large files in its own binary format (swds-bin), which can efficiently perform indexing, slicing and loading.
    • remote-link: If the user's original data is stored in some external storage such as OSS or NAS, with a lot of original data that is inconvenient to move or has already been encapsulated by some internal dataset implementation, then you only need to use links in the data to establish indexes.

    In the same Starwhale dataset, two types of data can be included simultaneously.

    - + \ No newline at end of file diff --git a/next/dataset/yaml/index.html b/next/dataset/yaml/index.html index 43201cdd2..ecb9e8df0 100644 --- a/next/dataset/yaml/index.html +++ b/next/dataset/yaml/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    The dataset.yaml Specification

    tip

    dataset.yaml is optional for the swcli dataset build command.

    Building Starwhale Dataset uses dataset.yaml. Omitting dataset.yaml allows describing related configurations in swcli dataset build command line parameters. dataset.yaml can be considered as a file-based representation of the build command line configuration.

    YAML Field Descriptions

    FieldDescriptionRequiredTypeDefault
    nameName of the Starwhale DatasetYesString
    handlerImportable address of a class that inherits starwhale.SWDSBinBuildExecutor, starwhale.UserRawBuildExecutor or starwhale.BuildExecutor, or a function that returns a Generator or iterable object. Format is {module path}:{class name\|function name}YesString
    descDataset descriptionNoString""
    versiondataset.yaml format version, currently only "1.0" is supportedNoString1.0
    attrDataset build parametersNoDict
    attr.volume_sizeSize of each data file in the swds-bin dataset. Can be a number in bytes, or a number plus unit like 64M, 1GB etc.NoInt or Str64MB
    attr.alignment_sizeData alignment size of each data block in the swds-bin dataset. If set to 4k, and a data block is 7.9K, 0.1K padding will be added to make the block size a multiple of alignment_size, improving page size and read efficiency.NoInteger or String128

    Examples

    Simplest Example

    name: helloworld
    handler: dataset:ExampleProcessExecutor

    The helloworld dataset uses the ExampleProcessExecutor class in dataset.py of the dataset.yaml directory to build data.

    MNIST Dataset Build Example

    name: mnist
    handler: mnist.dataset:DatasetProcessExecutor
    desc: MNIST data and label test dataset
    attr:
    alignment_size: 128
    volume_size: 4M

    Example with handler as a generator function

    dataset.yaml contents:

    name: helloworld
    handler: dataset:iter_item

    dataset.py contents:

    def iter_item():
    for i in range(10):
    yield {"img": f"image-{i}".encode(), "label": i}
    - + \ No newline at end of file diff --git a/next/evaluation/heterogeneous/node-able/index.html b/next/evaluation/heterogeneous/node-able/index.html index c2e89ee6c..f8af7e073 100644 --- a/next/evaluation/heterogeneous/node-able/index.html +++ b/next/evaluation/heterogeneous/node-able/index.html @@ -10,7 +10,7 @@ - + @@ -23,7 +23,7 @@ Refer to the link.

    Take v0.13.0-rc.1 as an example:

    kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.13.0-rc.1/nvidia-device-plugin.yml

    Note: This operation will run the NVIDIA device plugin plugin on all Kubernetes nodes. If configured before, it will be updated. Please evaluate the image version used carefully.

  • Confirm GPU can be discovered and used in the cluster. Refer to the command below. Check that nvidia.com/gpu is in the Capacity of the Jetson node. The GPU is then recognized normally by the Kubernetes cluster.

    # kubectl describe node orin | grep -A15 Capacity
    Capacity:
    cpu: 12
    ephemeral-storage: 59549612Ki
    hugepages-1Gi: 0
    hugepages-2Mi: 0
    hugepages-32Mi: 0
    hugepages-64Ki: 0
    memory: 31357608Ki
    nvidia.com/gpu: 1
    pods: 110
  • Build and Use Custom Images

    The l4t-jetpack image mentioned earlier can meet our general use. If we need to customize a more streamlined image or one with more features, we can make it based on l4t-base. Relevant Dockerfiles can refer to the image Starwhale made for mnist.

    - + \ No newline at end of file diff --git a/next/evaluation/heterogeneous/virtual-node/index.html b/next/evaluation/heterogeneous/virtual-node/index.html index c101e647b..07cc25d16 100644 --- a/next/evaluation/heterogeneous/virtual-node/index.html +++ b/next/evaluation/heterogeneous/virtual-node/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    Virtual Kubelet as Kubernetes nodes

    Introduction

    Virtual Kubelet is an open source framework that can simulate a K8s node by mimicking the communication between kubelet and the K8s cluster.

    This solution is widely used by major cloud vendors for serverless container cluster solutions, such as Alibaba Cloud's ASK, Amazon's AWS Fargate, etc.

    Principles

    The virtual kubelet framework implements the related interfaces of kubelet for Node. With simple configuration, it can simulate a node.

    We only need to implement the PodLifecycleHandler interface to support:

    • Create, update, delete Pod
    • Get Pod status
    • Get Container logs

    Adding Devices to the Cluster

    If our device cannot serve as a K8s node due to resource constraints or other situations, we can manage these devices by using virtual kubelet to simulate a proxy node.

    The control flow between Starwhale Controller and the device is as follows:


    ┌──────────────────────┐ ┌────────────────┐ ┌─────────────────┐ ┌────────────┐
    │ Starwhale Controller ├─────►│ K8s API Server ├────►│ virtual kubelet ├────►│ Our device │
    └──────────────────────┘ └────────────────┘ └─────────────────┘ └────────────┘

    Virtual kubelet converts the Pod orchestration information sent by Starwhale Controller into control behaviors for the device, such as executing a command via ssh on the device, or sending a message via USB or serial port.

    Below is an example of using virtual kubelet to control a device not joined to the cluster that is SSH-enabled:

    1. Prepare certificates
    • Create file vklet.csr with the following content:
    [req]
    req_extensions = v3_req
    distinguished_name = req_distinguished_name

    [req_distinguished_name]

    [v3_req]
    basicConstraints = CA:FALSE
    keyUsage = digitalSignature, keyEncipherment
    extendedKeyUsage = serverAuth
    subjectAltName = @alt_names

    [alt_names]
    IP = 1.2.3.4
    • Generate the certificate:
    openssl genrsa -out vklet-key.pem 2048
    openssl req -new -key vklet-key.pem -out vklet.csr -subj '/CN=system:node:1.2.3.4;/C=US/O=system:nodes' -config ./csr.conf
    • Submit the certificate:
    cat vklet.csr| base64 | tr -d "\n" # output as content of spec.request in csr.yaml

    csr.yaml:

    apiVersion: certificates.k8s.io/v1
    kind: CertificateSigningRequest
    metadata:
    name: vklet
    spec:
    request: ******************
    signerName: kubernetes.io/kube-apiserver-client
    expirationSeconds: 1086400
    usages:
    - client auth
    kubectl apply -f csr.yaml
    kubectl certificate approve vklet
    kubectl get csr vklet -o jsonpath='{.status.certificate}'| base64 -d > vklet-cert.pem

    Now we have vklet-cert.pem.

    • Compile virtual kubelet:
    git clone https://github.com/virtual-kubelet/virtual-kubelet
    cd virtual-kubelet && make build

    Create the node configuration file mock.json:

    {
    "virtual-kubelet":
    {
    "cpu": "100",
    "memory": "100Gi",
    "pods": "100"
    }
    }

    Start virtual kubelet:

    export APISERVER_CERT_LOCATION=/path/to/vklet-cert.pem
    export APISERVER_KEY_LOCATION=/path/to/vklet-key.pem
    export KUBECONFIG=/path/to/kubeconfig
    virtual-kubelet --provider mock --provider-config /path/to/mock.json

    Now we have simulated a node with 100 cores + 100GB memory using virtual kubelet.

    • Add PodLifecycleHandler implementation to convert important information in Pod orchestration into ssh command execution, and collect logs for Starwhale Controller to collect.

    See ssh executor for a concrete implementation.

    - + \ No newline at end of file diff --git a/next/evaluation/index.html b/next/evaluation/index.html index 276ecd33e..16d22a0c7 100644 --- a/next/evaluation/index.html +++ b/next/evaluation/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    Starwhale Model Evaluation

    Design Overview

    Starwhale Evaluation Positioning

    The goal of Starwhale Evaluation is to provide end-to-end management for model evaluation, including creating Jobs, distributing Tasks, viewing model evaluation reports and basic management. Starwhale Evaluation is a specific application of Starwhale Model, Starwhale Dataset, and Starwhale Runtime in the model evaluation scenario. Starwhale Evaluation is part of the MLOps toolchain built by Starwhale. More applications like Starwhale Model Serving, Starwhale Training will be included in the future.

    Core Features

    • Visualization: Both swcli and the Web UI provide visualization of model evaluation results, supporting comparison of multiple results. Users can also customize logging of intermediate processes.

    • Multi-scenario Adaptation: Whether it's a notebook, desktop or distributed cluster environment, the same commands, Python scripts, artifacts and operations can be used for model evaluation. This satisfies different computational power and data volume requirements.

    • Seamless Starwhale Integration: Leverage Starwhale Runtime for the runtime environment, Starwhale Dataset as data input, and run models from Starwhale Model. Configuration is simple whether using swcli, Python SDK or Cloud/Server instance Web UI.

    Key Elements

    • swcli model run: Command line for bulk offline model evaluation.
    • swcli model serve: Command line for online model evaluation.

    Best Practices

    Command Line Grouping

    From the perspective of completing an end-to-end Starwhale Evaluation workflow, commands can be grouped as:

    • Preparation Stage
      • swcli dataset build or Starwhale Dataset Python SDK
      • swcli model build or Starwhale Model Python SDK
      • swcli runtime build
    • Evaluation Stage
      • swcli model run
      • swcli model serve
    • Results Stage
      • swcli job info
    • Basic Management
      • swcli job list
      • swcli job remove
      • swcli job recover

    Abstraction job-step-task

    • job: A model evaluation task is a job, which contains one or more steps.

    • step: A step corresponds to a stage in the evaluation process. With the default PipelineHandler, steps are predict and evaluate. For custom evaluation processes using @handler, @evaluation.predict, @evaluation.evaluate decorators, steps are the decorated functions. Steps can have dependencies, forming a DAG. A step contains one or more tasks. Tasks in the same step have the same logic but different inputs. A common approach is to split the dataset into multiple parts, with each part passed to a task. Tasks can run in parallel.

    • task: A task is the final running entity. In Cloud/Server instances, a task is a container in a Pod. In Standalone instances, a task is a Python Thread.

    The job-step-task abstraction is the basis for implementing distributed runs in Starwhale Evaluation.

    - + \ No newline at end of file diff --git a/next/faq/index.html b/next/faq/index.html index 6581732f6..142c01734 100644 --- a/next/faq/index.html +++ b/next/faq/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    FAQs

    Error "413 Client Error: Request Entity Too Large" when Copying Starwhale Models to Server

    • Cause: The proxy-body-size set in the Ingress (Nginx default is 1MB) is smaller than the actual uploaded file size.
    • Solution: Check the Ingress configuration of the Starwhale Server and add nginx.ingress.kubernetes.io/proxy-body-size: 30g to the annotations field.

    RBAC Authorization Error when Starwhale Server Submits Jobs to Kubernetes Cluster

    The Kubernetes cluster has RBAC enabled, and the service account for the Starwhale Server does not have sufficient permissions. It requires at least the following permissions:

    ResourceAPI GroupGetListWatchCreateDelete
    jobsbatchYYYYY
    podscoreYYY
    nodescoreYYY
    events""Y

    Example YAML:

    apiVersion: rbac.authorization.k8s.io/v1
    kind: ClusterRole
    metadata:
    name: starwhale-role
    rules:
    - apiGroups:
    - ""
    resources:
    - pods
    - nodes
    verbs:
    - get
    - list
    - watch
    - apiGroups:
    - "batch"
    resources:
    - jobs
    verbs:
    - create
    - get
    - list
    - watch
    - delete
    - apiGroups:
    - ""
    resources:
    - events
    verbs:
    - get
    - watch
    - list
    ---
    apiVersion: rbac.authorization.k8s.io/v1
    kind: ClusterRoleBinding
    metadata:
    name: starwhale-binding
    roleRef:
    apiGroup: rbac.authorization.k8s.io
    kind: ClusterRole
    name: starwhale-role
    subjects:
    - kind: ServiceAccount
    name: starwhale
    - + \ No newline at end of file diff --git a/next/getting-started/cloud/index.html b/next/getting-started/cloud/index.html index 8de584745..bcd80765d 100644 --- a/next/getting-started/cloud/index.html +++ b/next/getting-started/cloud/index.html @@ -10,13 +10,13 @@ - +
    -
    Version: WIP

    Getting started with Starwhale Cloud

    Starwhale Cloud is hosted on Aliyun with the domain name https://cloud.starwhale.cn. In the futher, we will launch the service on AWS with the domain name https://cloud.starwhale.ai. It's important to note that these are two separate instances that are not interconnected, and accounts and data are not shared. You can choose either one to get started.

    You need to install the Starwhale Client (swcli) at first.

    Sign Up for Starwhale Cloud and create your first project

    You can either directly log in with your GitHub or Weixin account or sign up for an account. You will be asked for an account name if you log in with your GitHub or Weixin account.

    Then you can create a new project. In this tutorial, we will use the name demo for the project name.

    Build the dataset, model, and runtime on your local machine

    Follow step 1 to step 4 in Getting started with Starwhale Standalone to create:

    • a Starwhale model named mnist
    • a Starwhale dataset named mnist
    • a Starwhale runtime named pytorch

    Login to the cloud instance

    swcli instance login --username <your account name> --password <your password> --alias swcloud https://cloud.starwhale.cn

    Copy the dataset, model, and runtime to the cloud instance

    swcli model copy mnist swcloud/project/<your account name>:demo
    swcli dataset copy mnist swcloud/project/<your account name>:demo
    swcli runtime copy pytorch swcloud/project/<your account name>:demo

    Run an evaluation with the web UI

    console-create-job.gif

    Congratulations! You have completed the Starwhale Cloud Getting Started Guide.

    - +
    Version: WIP

    Getting started with Starwhale Cloud

    Starwhale Cloud is hosted on Aliyun with the domain name https://cloud.starwhale.cn. In the futher, we will launch the service on AWS with the domain name https://cloud.starwhale.ai. It's important to note that these are two separate instances that are not interconnected, and accounts and data are not shared. You can choose either one to get started.

    You need to install the Starwhale Client (swcli) at first.

    Sign Up for Starwhale Cloud and create your first project

    You can either directly log in with your GitHub or Weixin account or sign up for an account. You will be asked for an account name if you log in with your GitHub or Weixin account.

    Then you can create a new project. In this tutorial, we will use the name demo for the project name.

    Build the dataset, model, and runtime on your local machine

    Follow step 1 to step 4 in Getting started with Starwhale Standalone to create:

    • a Starwhale model named helloworld
    • a Starwhale dataset named mnist64
    • a Starwhale runtime named helloworld

    Login to the cloud instance

    swcli instance login --username <your account name> --password <your password> --alias swcloud https://cloud.starwhale.cn

    Copy the dataset, model, and runtime to the cloud instance

    swcli model copy helloworld swcloud/project/<your account name>:demo
    swcli dataset copy mnist64 swcloud/project/<your account name>:demo
    swcli runtime copy helloworld swcloud/project/<your account name>:demo

    Run an evaluation with the web UI

    console-create-job.gif

    Congratulations! You have completed the Starwhale Cloud Getting Started Guide.

    + \ No newline at end of file diff --git a/next/getting-started/index.html b/next/getting-started/index.html index 61a6a57ee..062e4d402 100644 --- a/next/getting-started/index.html +++ b/next/getting-started/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    Getting started

    First, you need to install the Starwhale Client (swcli), which can be done by running the following command:

    python3 -m pip install starwhale

    For more information, see the swcli installation guide.

    Depending on your instance type, there are three getting-started guides available for you:

    • Getting started with Starwhale Standalone - This guide helps you run an MNIST evaluation on your desktop PC/laptop. It is the fastest and simplest way to get started with Starwhale.
    • Getting started with Starwhale Server - This guide helps you install Starwhale Server in your private data center and run an MNIST evaluation. At the end of the tutorial, you will have a Starwhale Server instance where you can run model evaluations on and manage your datasets and models.
    • Getting started with Starwhale Cloud - This guide helps you create an account on Starwhale Cloud and run an MNIST evaluation. It is the easiest way to experience all Starwhale features.
    - + \ No newline at end of file diff --git a/next/getting-started/runtime/index.html b/next/getting-started/runtime/index.html index 1d8b329cc..252e66446 100644 --- a/next/getting-started/runtime/index.html +++ b/next/getting-started/runtime/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    Getting Started with Starwhale Runtime

    This article demonstrates how to build a Starwhale Runtime of the Pytorch environment and how to use it. This runtime can meet the dependency requirements of the six examples in Starwhale: mnist, speech commands, nmt, cifar10, ag_news, and PennFudan. Links to relevant code: example/runtime/pytorch.

    You can learn the following things from this tutorial:

    • How to build a Starwhale Runtime.
    • How to use a Starwhale Runtime in different scenarios.
    • How to release a Starwhale Runtime.

    Prerequisites

    Run the following command to clone the example code:

    git clone https://github.com/star-whale/starwhale.git
    cd starwhale/example/runtime/pytorch # for users in the mainland of China, use pytorch-cn-mirror instead.

    Build Starwhale Runtime

    ❯ swcli -vvv runtime build --yaml runtime.yaml

    Use Starwhale Runtime in the standalone instance

    Use Starwhale Runtime in the shell

    # Activate the runtime
    swcli runtime activate pytorch

    swcli runtime activate will download all python dependencies of the runtime, which may take a long time.

    All dependencies are ready in your python environment when the runtime is activated. It is similar to source venv/bin/activate of virtualenv or the conda activate command of conda. If you close the shell or switch to another shell, you need to reactivate the runtime.

    Use Starwhale Runtime in swcli

    # Use the runtime when building a Starwhale Model
    swcli model build . --runtime pytorch
    # Use the runtime when building a Starwhale Dataset
    swcli dataset build --yaml /path/to/dataset.yaml --runtime pytorch
    # Run a model evaluation with the runtime
    swcli model run --uri mnist/version/v0 --dataset mnist --runtime pytorch

    Copy Starwhale Runtime to another instance

    You can copy the runtime to a server/cloud instance, which can then be used in the server/cloud instance or downloaded by other users.

    # Copy the runtime to a server instance named 'pre-k8s'
    ❯ swcli runtime copy pytorch cloud://pre-k8s/project/starwhale
    - + \ No newline at end of file diff --git a/next/getting-started/server/index.html b/next/getting-started/server/index.html index 6d53061b2..63f9d7f4c 100644 --- a/next/getting-started/server/index.html +++ b/next/getting-started/server/index.html @@ -10,13 +10,13 @@ - +
    -
    Version: WIP

    Getting started with Starwhale Server

    Install Starwhale Server

    To install Starwhale Server, see the installation guide.

    Create your first project

    Login to the server

    Open your browser and enter your server's URL in the address bar. Login with your username(starwhale) and password(abcd1234).

    console-artifacts.gif

    Create a new project

    Build the dataset, model, and runtime on your local machine

    Follow step 1 to step 4 in Getting started with Starwhale Standalone to create:

    • a Starwhale model named mnist
    • a Starwhale dataset named mnist
    • a Starwhale runtime named pytorch

    Copy the dataset, the model, and the runtime to the server

    swcli instance login --username <your username> --password <your password> --alias server <Your Server URL>

    swcli model copy mnist server/project/demo
    swcli dataset copy mnist server/project/demo
    swcli runtime copy pytorch server/project/demo

    Use the Web UI to run an evaluation

    Navigate to the "demo" project in your browser and create a new one.

    console-create-job.gif

    Congratulations! You have completed the Starwhale Server Getting Started Guide.

    - +
    Version: WIP

    Getting started with Starwhale Server

    Install Starwhale Server

    To install Starwhale Server, see the installation guide.

    Create your first project

    Login to the server

    Open your browser and enter your server's URL in the address bar. Login with your username(starwhale) and password(abcd1234).

    console-artifacts.gif

    Create a new project

    Build the dataset, model, and runtime on your local machine

    Follow step 1 to step 4 in Getting started with Starwhale Standalone to create:

    • a Starwhale model named helloworld
    • a Starwhale dataset named mnist64
    • a Starwhale runtime named helloworld

    Copy the dataset, the model, and the runtime to the server

    swcli instance login --username <your username> --password <your password> --alias server <Your Server URL>

    swcli model copy helloworld server/project/demo
    swcli dataset copy mnist64 server/project/demo
    swcli runtime copy helloworld server/project/demo

    Use the Web UI to run an evaluation

    Navigate to the "demo" project in your browser and create a new one.

    console-create-job.gif

    Congratulations! You have completed the Starwhale Server Getting Started Guide.

    + \ No newline at end of file diff --git a/next/getting-started/standalone/index.html b/next/getting-started/standalone/index.html index 89ff3b83f..ada71eba1 100644 --- a/next/getting-started/standalone/index.html +++ b/next/getting-started/standalone/index.html @@ -10,13 +10,13 @@ - +
    -
    Version: WIP

    Getting started with Starwhale Standalone

    When the Starwhale Client (swcli) is installed, you are ready to use Starwhale Standalone.

    We also provide a Jupyter Notebook example, you can try it in Google Colab or in your local vscode/jupyterlab.

    Downloading Examples

    Download Starwhale examples by cloning the Starwhale project via:

    GIT_LFS_SKIP_SMUDGE=1 git clone https://github.com/star-whale/starwhale.git --depth 1
    cd starwhale

    To save time in the example downloading, we skip git-lfs and other commits info. We will use ML/DL HelloWorld code MNIST to start your Starwhale journey. The following steps are all performed in the starwhale directory.

    Core Workflow

    Building a Pytorch Runtime

    Runtime example codes are in the example/runtime/pytorch directory.

    • Build the Starwhale runtime bundle:

      swcli runtime build --yaml example/runtime/pytorch/runtime.yaml
      tip

      When you first build runtime, creating an isolated python environment and downloading python dependencies will take a lot of time. The command execution time is related to the network environment of the machine and the number of packages in the runtime.yaml. Using the befitting pypi mirror and cache config in the ~/.pip/pip.conf file is a recommended practice.

      For users in the mainland of China, the following conf file is an option:

      [global]
      cache-dir = ~/.cache/pip
      index-url = https://pypi.tuna.tsinghua.edu.cn/simple
      extra-index-url = https://mirrors.aliyun.com/pypi/simple/
    • Check your local Starwhale Runtime:

      swcli runtime list
      swcli runtime info pytorch

    Building a Model

    Model example codes are in the example/mnist directory.

    • Download the pre-trained model file:

      cd example/mnist
      make download-model
      # For users in the mainland of China, please add `CN=1` environment for make command:
      # CN=1 make download-model
      cd -
    • Build a Starwhale model:

      swcli model build example/mnist --runtime pytorch
    • Check your local Starwhale models:

      swcli model list
      swcli model info mnist

    Building a Dataset

    Dataset example codes are in the example/mnist directory.

    • Download the MNIST raw data:

      cd example/mnist
      make download-data
      # For users in the mainland of China, please add `CN=1` environment for make command:
      # CN=1 make download-data
      cd -
    • Build a Starwhale dataset:

      swcli dataset build --yaml example/mnist/dataset.yaml
    • Check your local Starwhale dataset:

      swcli dataset list
      swcli dataset info mnist
      swcli dataset head mnist

    Running an Evaluation Job

    • Create an evaluation job:

      swcli -vvv model run --uri mnist --dataset mnist --runtime pytorch
    • Check the evaluation result

      swcli job list
      swcli job info $(swcli job list | grep mnist | grep success | awk '{print $1}' | head -n 1)

    Congratulations! You have completed the Starwhale Standalone Getting Started Guide.

    - +
    Version: WIP

    Getting started with Starwhale Standalone

    When the Starwhale Client (swcli) is installed, you are ready to use Starwhale Standalone.

    We also provide a Jupyter Notebook example, you can try it in Google Colab or in your local vscode/jupyterlab.

    Downloading Examples

    Download Starwhale examples by cloning the Starwhale project via:

    GIT_LFS_SKIP_SMUDGE=1 git clone https://github.com/star-whale/starwhale.git --depth 1
    cd starwhale

    To save time in the example downloading, we skip git-lfs and other commits info. We will use ML/DL HelloWorld code MNIST to start your Starwhale journey. The following steps are all performed in the starwhale directory.

    Core Workflow

    Building Starwhale Runtime

    Runtime example codes are in the example/helloworld directory.

    • Build the Starwhale runtime bundle:

      swcli -vvv runtime build --yaml example/helloworld/runtime.yaml
      tip

      When you first build runtime, creating an isolated python environment and downloading python dependencies will take a lot of time. The command execution time is related to the network environment of the machine and the number of packages in the runtime.yaml. Using the befitting pypi mirror and cache config in the ~/.pip/pip.conf file is a recommended practice.

      For users in the mainland of China, the following conf file is an option:

      [global]
      cache-dir = ~/.cache/pip
      index-url = https://pypi.tuna.tsinghua.edu.cn/simple
      extra-index-url = https://mirrors.aliyun.com/pypi/simple/
    • Check your local Starwhale Runtime:

      swcli runtime list
      swcli runtime info helloworld

    Building a Model

    Model example codes are in the example/helloworld directory.

    • Build a Starwhale model:

      swcli -vvv model build example/helloworld --name helloworld -m evaluation --runtime helloworld
    • Check your local Starwhale models:

      swcli model list
      swcli model info helloworld

    Building a Dataset

    Dataset example codes are in the example/helloworld directory.

    • Build a Starwhale dataset:

      swcli runtime activate helloworld
      python3 example/helloworld/dataset.py
      deactivate
    • Check your local Starwhale dataset:

      swcli dataset list
      swcli dataset info mnist64
      swcli dataset head mnist64

    Running an Evaluation Job

    • Create an evaluation job:

      swcli -vvv model run --uri helloworld --dataset mnist64 --runtime helloworld
    • Check the evaluation result

      swcli job list
      swcli job info $(swcli job list | grep mnist | grep success | awk '{print $1}' | head -n 1)

    Congratulations! You have completed the Starwhale Standalone Getting Started Guide.

    + \ No newline at end of file diff --git a/next/index.html b/next/index.html index bcbd464a4..5dfb3d14f 100644 --- a/next/index.html +++ b/next/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    What is Starwhale

    Overview

    Starwhale is an MLOps/LLMOps platform that make your model creation, evaluation and publication much easier. It aims to create a handy tool for data scientists and machine learning engineers.

    Starwhale helps you:

    • Keep track of your training/testing dataset history including data items and their labels, so that you can easily access them.
    • Manage your model packages that you can share across your team.
    • Run your models in different environments, either on a Nvidia GPU server or on an embedded device like Cherry Pi.
    • Create a online service with interactive Web UI for your models.

    Starwhale is designed to be an open platform. You can create your own plugins to meet your requirements.

    Deployment options

    Each deployment of Starwhale is called an instance. All instances can be managed by the Starwhale Client (swcli).

    You can start using Starwhale with one of the following instance types:

    • Starwhale Standalone - Rather than a running service, Starwhale Standalone is actually a repository that resides in your local file system. It is created and managed by the Starwhale Client (swcli). You only need to install swcli to use it. Currently, each user on a single machine can have only ONE Starwhale Standalone instance. We recommend you use the Starwhale Standalone to build and test your datasets, runtime, and models before pushing them to Starwhale Server/Cloud instances.
    • Starwhale Server - Starwhale Server is a service deployed on your local server. Besides text-only results from the Starwhale Client (swcli), Starwhale Server provides Web UI for you to manage your datasets and models, evaluate your models in your local Kubernetes cluster, and review the evaluation results.
    • Starwhale Cloud - Starwhale Cloud is a managed service hosted on public clouds. By registering an account on https://cloud.starwhale.cn, you are ready to use Starwhale without needing to install, operate, and maintain your own instances. Starwhale Cloud also provides public resources for you to download, like datasets, runtimes, and models. Check the "starwhale/public" project on Starwhale Cloud for more details.

    When choosing which instance type to use, consider the following:

    Instance TypeDeployment locationMaintained byUser InterfaceScalability
    Starwhale StandaloneYour laptop or any server in your data centerNot requiredCommand lineNot scalable
    Starwhale ServerYour data centerYourselfWeb UI and command lineScalable, depends on your Kubernetes cluster
    Starwhale CloudPublic cloud, like AWS or Aliyunthe Starwhale TeamWeb UI and command lineScalable, but currently limited by the freely available resource on the cloud
    - + \ No newline at end of file diff --git a/next/model/index.html b/next/model/index.html index fc1669964..57b9bfad3 100644 --- a/next/model/index.html +++ b/next/model/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    Starwhale Model

    overview

    A Starwhale Model is a standard format for packaging machine learning models that can be used for various purposes, like model fine-tuning, model evaluation, and online serving. A Starwhale Model contains the model file, inference codes, configuration files, and any other files required to run the model.

    Create a Starwhale Model

    There are two ways to create a Starwhale Model: by swcli or by Python SDK.

    Create a Starwhale Model by swcli

    To create a Starwhale Model by swcli, you need to define a model.yaml, which describes some required information about the model package, and run the following command:

    swcli model build . --model-yaml /path/to/model.yaml

    For more information about the command and model.yaml, see the swcli reference. model.yaml is optional for model building.

    Create a Starwhale Model by Python SDK

    from starwhale import model, predict

    @predict
    def predict_img(data):
    ...

    model.build(name="mnist", modules=[predict_img])

    Model Management

    Model Management by swcli

    CommandDescription
    swcli model listList all Starwhale Models in a project
    swcli model infoShow detail information about a Starwhale Model
    swcli model copyCopy a Starwhale Model to another location
    swcli model removeRemove a Starwhale Model
    swcli model recoverRecover a previously removed Starwhale Model

    Model Management by WebUI

    Model History

    Starwhale Models are versioned. The general rules about versions are described in Resource versioning in Starwhale.

    Model History Management by swcli

    CommandDescription
    swcli model historyList all versions of a Starwhale Model
    swcli model infoShow detail information about a Starwhale Model version
    swcli model diffCompare two versions of a Starwhale model
    swcli model copyCopy a Starwhale Model version to a new one
    swcli model removeRemove a Starwhale Model version
    swcli model recoverRecover a previously removed Starwhale Model version

    Model Evaluation

    Model Evaluation by swcli

    CommandDescription
    swcli model runCreate an evaluation with a Starwhale Model

    The Storage Format

    The Starwhale Model is a tarball file that contains the source directory.

    - + \ No newline at end of file diff --git a/next/model/yaml/index.html b/next/model/yaml/index.html index 5c2f3f16f..160cb715d 100644 --- a/next/model/yaml/index.html +++ b/next/model/yaml/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    The model.yaml Specification

    tip

    model.yaml is optional for swcli model build.

    When building a Starwhale Model using the swcli model build command, you can specify a yaml file that follows a specific format via the --model-yaml parameter to simplify specifying build parameters.

    Even without specifying the --model-yaml parameter, swcli model build will automatically look for a model.yaml file under the ${workdir} directory and extract parameters from it. Parameters specified on the swcli model build command line take precedence over equivalent configurations in model.yaml, so you can think of model.yaml as a file-based representation of the build command line.

    When building a Starwhale Model using the Python SDK, the model.yaml file does not take effect.

    YAML Field Descriptions

    FieldDescriptionRequiredTypeDefault
    nameName of the Starwhale Model, equivalent to --name parameter.NoString
    run.modulesPython Modules searched during model build, can specify multiple entry points for model execution, format is Python Importable path. Equivalent to --module parameter.YesList[String]
    run.handlerDeprecated alias of run.modules, can only specify one entry point.NoString
    versiondataset.yaml format version, currently only supports "1.0"NoString1.0
    descModel description, equivalent to --desc parameter.NoString

    Example


    name: helloworld

    run:
    modules:
    - src.evaluator

    desc: "example yaml"

    A Starwhale model named helloworld, searches for functions decorated with @evaluation.predict, @evaluation.evaluate or @handler, or classes inheriting from PipelineHandler in src/evaluator.py under ${WORKDIR} of the swcli model build command. These functions or classes will be added to the list of runnable entry points for the Starwhale model. When running the model via swcli model run or Web UI, select the corresponding entry point (handler) to run.

    model.yaml is optional, parameters defined in yaml can also be specified via swcli command line parameters.


    swcli model build . --model-yaml model.yaml

    Is equivalent to:


    swcli model build . --name helloworld --module src.evaluator --desc "example yaml"

    - + \ No newline at end of file diff --git a/next/reference/sdk/dataset/index.html b/next/reference/sdk/dataset/index.html index 1d28bf2ed..1243919ee 100644 --- a/next/reference/sdk/dataset/index.html +++ b/next/reference/sdk/dataset/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    Starwhale Dataset SDK

    dataset

    Get starwhale.Dataset object, by creating new datasets or loading existing datasets.

    @classmethod
    def dataset(
    cls,
    uri: t.Union[str, Resource],
    create: str = _DatasetCreateMode.auto,
    readonly: bool = False,
    ) -> Dataset:

    Parameters

    • uri: (str or Resource, required)
      • The dataset uri or Resource object.
    • create: (str, optional)
      • The mode of dataset creating. The options are auto, empty and forbid.
        • auto mode: If the dataset already exists, creation is ignored. If it does not exist, the dataset is created automatically.
        • empty mode: If the dataset already exists, an Exception is raised; If it does not exist, an empty dataset is created. This mode ensures the creation of a new, empty dataset.
        • forbid mode: If the dataset already exists, nothing is done.If it does not exist, an Exception is raised. This mode ensures the existence of the dataset.
      • The default is auto.
    • readonly: (bool, optional)
      • For an existing dataset, you can specify the readonly=True argument to ensure the dataset is in readonly mode.
      • Default is False.

    Examples

    from starwhale import dataset, Image

    # create a new dataset named mnist, and add a row into the dataset
    # dataset("mnist") is equal to dataset("mnist", create="auto")
    ds = dataset("mnist")
    ds.exists() # return False, "mnist" dataset is not existing.
    ds.append({"img": Image(), "label": 1})
    ds.commit()
    ds.close()

    # load a cloud instance dataset in readonly mode
    ds = dataset("cloud://remote-instance/project/starwhale/dataset/mnist", readonly=True)
    labels = [row.features.label in ds]
    ds.close()

    # load a read/write dataset with a specified version
    ds = dataset("mnist/version/mrrdczdbmzsw")
    ds[0].features.label = 1
    ds.commit()
    ds.close()

    # create an empty dataset
    ds = dataset("mnist-empty", create="empty")

    # ensure the dataset existence
    ds = dataset("mnist-existed", create="forbid")

    class starwhale.Dataset

    starwhale.Dataset implements the abstraction of a Starwhale dataset, and can operate on datasets in Standalone/Server/Cloud instances.

    from_huggingface

    from_huggingface is a classmethod that can convert a Huggingface dataset into a Starwhale dataset.

    def from_huggingface(
    cls,
    name: str,
    repo: str,
    subset: str | None = None,
    split: str | None = None,
    revision: str = "main",
    alignment_size: int | str = D_ALIGNMENT_SIZE,
    volume_size: int | str = D_FILE_VOLUME_SIZE,
    mode: DatasetChangeMode | str = DatasetChangeMode.PATCH,
    cache: bool = True,
    tags: t.List[str] | None = None,
    ) -> Dataset:

    Parameters

    • name: (str, required)
      • dataset name.
    • repo: (str, required)
    • subset: (str, optional)
      • The subset name. If the huggingface dataset has multiple subsets, you must specify the subset name.
    • split: (str, optional)
      • The split name. If the split name is not specified, the all splits dataset will be built.
    • revision: (str, optional)
      • The huggingface datasets revision. The default value is main. If the split name is not specified, the all splits dataset will be built.
    • alignment_size: (int|str, optional)
      • The blob alignment size.
      • The default value is 128 Bytes.
    • volume_size: (int|str, optional)
      • The maximum size of a dataset blob file. A new blob file will be generated when the size exceeds this limit.
      • The default value is 64MB.
    • mode: (str|DatasetChangeMode, optional)
      • The dataset change mode. The default value is patch. Mode choices are patch and overwrite.
    • cache: (bool, optional)
      • Whether to use huggingface dataset cache(download + local hf dataset).
      • The default value is True.
    • tags: (List[str], optional)
      • The user custom tags of the dataset.

    Examples

    from starwhale import Dataset
    myds = Dataset.from_huggingface("mnist", "mnist")
    print(myds[0])
    from starwhale import Dataset
    myds = Dataset.from_huggingface("mmlu", "cais/mmlu", subset="anatomy", split="auxiliary_train", revision="7456cfb")

    from_json

    from_json is a classmethod that can convert a json text into a Starwhale dataset.

    @classmethod
    def from_json(
    cls,
    name: str,
    json_text: str,
    field_selector: str = "",
    alignment_size: int | str = D_ALIGNMENT_SIZE,
    volume_size: int | str = D_FILE_VOLUME_SIZE,
    mode: DatasetChangeMode | str = DatasetChangeMode.PATCH,
    tags: t.List[str] | None = None,
    ) -> Dataset:

    Parameters

    • name: (str, required)
      • Dataset name.
    • json_text: (str, required)
      • A json string. The from_json function deserializes this string into Python objects to start building the Starwhale dataset.
    • field_selector: (str, optional)
      • The filed from which you would like to extract dataset array items.
      • The default value is "" which indicates that the json object is an array contains all the items.
    • alignment_size: (int|str, optional)
      • The blob alignment size.
      • The default value is 128 Bytes.
    • volume_size: (int|str, optional)
      • The maximum size of a dataset blob file. A new blob file will be generated when the size exceeds this limit.
      • The default value is 64MB.
    • mode: (str|DatasetChangeMode, optional)
      • The dataset change mode. The default value is patch. Mode choices are patch and overwrite.
    • tags: (List[str], optional)
      • The user custom tags of the dataset.

    Examples

    from starwhale import Dataset
    myds = Dataset.from_json(
    name="translation",
    json_text='[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]'
    )
    print(myds[0].features.en)
    from starwhale import Dataset
    myds = Dataset.from_json(
    name="translation",
    json_text='{"content":{"child_content":[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]}}',
    field_selector="content.child_content"
    )
    print(myds[0].features["zh-cn"])

    from_folder

    from_folder is a classmethod that can read Image/Video/Audio data from a specified directory and automatically convert them into a Starwhale dataset. This function supports the following features:

    • It can recursively search the target directory and its subdirectories
    • Supports extracting three types of files:
      • image: Supports png/jpg/jpeg/webp/svg/apng image types. Image files will be converted to Starwhale.Image type.
      • video: Supports mp4/webm/avi video types. Video files will be converted to Starwhale.Video type.
      • audio: Supports mp3/wav audio types. Audio files will be converted to Starwhale.Audio type.
    • Each file corresponds to one record in the dataset, with the file stored in the file field.
    • If auto_label=True, the parent directory name will be used as the label for that record, stored in the label field. Files in the root directory will not be labeled.
    • If a txt file with the same name as an image/video/audio file exists, its content will be stored as the caption field in the dataset.
    • If metadata.csv or metadata.jsonl exists in the root directory, their content will be read automatically and associated to records by file path as meta information in the dataset.
      • metadata.csv and metadata.jsonl are mutually exclusive. An exception will be thrown if both exist.
      • Each record in metadata.csv and metadata.jsonl must contain a file_name field pointing to the file path.
      • metadata.csv and metadata.jsonl are optional for dataset building.
    @classmethod
    def from_folder(
    cls,
    folder: str | Path,
    kind: str | DatasetFolderSourceType,
    name: str | Resource = "",
    auto_label: bool = True,
    alignment_size: int | str = D_ALIGNMENT_SIZE,
    volume_size: int | str = D_FILE_VOLUME_SIZE,
    mode: DatasetChangeMode | str = DatasetChangeMode.PATCH,
    tags: t.List[str] | None = None,
    ) -> Dataset:

    Parameters

    • folder: (str|Path, required)
      • The folder path from which you would like to create this dataset.
    • kind: (str|DatasetFolderSourceType, required)
      • The dataset source type you would like to use, the choices are: image, video and audio.
      • Recursively searching for files of the specified kind in folder. Other file types will be ignored.
    • name: (str|Resource, optional)
      • The dataset name you would like to use.
      • If not specified, the name is the folder name.
    • auto_label: (bool, optional)
      • Whether to auto label by the sub-folder name.
      • The default value is True.
    • alignment_size: (int|str, optional)
      • The blob alignment size.
      • The default value is 128 Bytes.
    • volume_size: (int|str, optional)
      • The maximum size of a dataset blob file. A new blob file will be generated when the size exceeds this limit.
      • The default value is 64MB.
    • mode: (str|DatasetChangeMode, optional)
      • The dataset change mode. The default value is patch. Mode choices are patch and overwrite.
    • tags: (List[str], optional)
      • The user custom tags of the dataset.

    Examples ${folder-example}

    • Example for the normal function calling

      from starwhale import Dataset

      # create a my-image-dataset dataset from /path/to/image folder.
      ds = Dataset.from_folder(
      folder="/path/to/image",
      kind="image",
      name="my-image-dataset"
      )
    • Example for caption

      folder/dog/1.png
      folder/dog/1.txt

      1.txt content will be used as the caption of 1.png.

    • Example for metadata

      metadata.csv:

      file_name, caption
      1.png, dog
      2.png, cat

      metadata.jsonl:

      {"file_name": "1.png", "caption": "dog"}
      {"file_name": "2.png", "caption": "cat"}
    • Example for auto-labeling

      The following structure will create a dataset with 2 labels: "cat" and "dog", 4 images in total.

      folder/dog/1.png
      folder/cat/2.png
      folder/dog/3.png
      folder/cat/4.png

    __iter__

    __iter__ a method that iter the dataset rows.

    from starwhale import dataset

    ds = dataset("mnist")

    for item in ds:
    print(item.index)
    print(item.features.label) # label and img are the features of mnist.
    print(item.features.img)

    batch_iter

    batch_iter is a method that iter the dataset rows in batch.

    def batch_iter(
    self, batch_size: int = 1, drop_not_full: bool = False
    ) -> t.Iterator[t.List[DataRow]]:

    Parameters

    • batch_size: (int, optional)
      • batch size. The default value is 1.
    • drop_not_full: (bool, optional)
      • Whether the last batch of data, with a size smaller than batch_size, it will be discarded.
      • The default value is False.

    Examples

    from starwhale import dataset

    ds = dataset("mnist")
    for batch_rows in ds.batch_iter(batch_size=2):
    assert len(batch_rows) == 2
    print(batch_rows[0].features)

    __getitem__

    __getitem__ is a method that allows retrieving certain rows of data from the dataset, with usage similar to Python dict and list types.

    from starwhale import dataset

    ds = dataset("mock-int-index")

    # if the index type is string
    ds["str_key"] # get the DataRow by the "str_key" string key
    ds["start":"end"] # get a slice of the dataset by the range ("start", "end")

    ds = dataset("mock-str-index")
    # if the index type is int
    ds[1] # get the DataRow by the 1 int key
    ds[1:10:2] # get a slice of the dataset by the range (1, 10), step is 2

    __setitem__

    __setitem__ is a method that allows updating rows of data in the dataset, with usage similar to Python dicts. __setitem__ supports multi-threaded parallel data insertion.

    def __setitem__(
    self, key: t.Union[str, int], value: t.Union[DataRow, t.Tuple, t.Dict]
    ) -> None:

    Parameters

    • key: (int|str, required)
      • key is the index for each row in the dataset. The type is int or str, but a dataset only accepts one type.
    • value: (DataRow|tuple|dict, required)
      • value is the features for each row in the dataset, using a Python dict is generally recommended.

    Examples

    • Normal insertion

    Insert two rows into the test dataset, with index test and test2 repectively:

    from starwhale import dataset

    with dataset("test") as ds:
    ds["test"] = {"txt": "abc", "int": 1}
    ds["test2"] = {"txt": "bcd", "int": 2}
    ds.commit()
    • Parallel insertion
    from starwhale import dataset, Binary
    from concurrent.futures import as_completed, ThreadPoolExecutor

    ds = dataset("test")

    def _do_append(_start: int) -> None:
    for i in range(_start, 100):
    ds.append((i, {"data": Binary(), "label": i}))

    pool = ThreadPoolExecutor(max_workers=10)
    tasks = [pool.submit(_do_append, i * 10) for i in range(0, 9)]

    ds.commit()
    ds.close()

    __delitem__

    __delitem__ is a method to delete certain rows of data from the dataset.

    def __delitem__(self, key: _ItemType) -> None:
    from starwhale import dataset

    ds = dataset("existed-ds")
    del ds[6:9]
    del ds[0]
    ds.commit()
    ds.close()

    append

    append is a method to append data to a dataset, similar to the append method for Python lists.

    • Adding features dict, each row is automatically indexed with int starting from 0 and incrementing.

      from starwhale import dataset, Image

      with dataset("new-ds") as ds:
      for i in range(0, 100):
      ds.append({"label": i, "image": Image(f"folder/{i}.png")})
      ds.commit()
    • By appending the index and features dictionary, the index of each data row in the dataset will not be handled automatically.

      from dataset import dataset, Image

      with dataset("new-ds") as ds:
      for i in range(0, 100):
      ds.append((f"index-{i}", {"label": i, "image": Image(f"folder/{i}.png")}))

      ds.commit()

    extend

    extend is a method to bulk append data to a dataset, similar to the extend method for Python lists.

    from starwhale import dataset, Text

    ds = dataset("new-ds")
    ds.extend([
    (f"label-{i}", {"text": Text(), "label": i}) for i in range(0, 10)
    ])
    ds.commit()
    ds.close()

    commit

    commit is a method that flushes the current cached data to storage when called, and generates a dataset version. This version can then be used to load the corresponding dataset content afterwards.

    For a dataset, if some data is added without calling commit, but close is called or the process exits directly instead, the data will still be written to the dataset, just without generating a new version.

    @_check_readonly
    def commit(
    self,
    tags: t.Optional[t.List[str]] = None,
    message: str = "",
    force_add_tags: bool = False,
    ignore_add_tags_errors: bool = False,
    ) -> str:

    Parameters

    • tags: (list(str), optional)
      • tag as a list
    • message: (str, optional)
      • commit message. The default value is empty.
    • force_add_tags: (bool, optional)
      • For server/cloud instances, when adding labels to this version, if a label has already been applied to other dataset versions, you can use the force_add_tags=True parameter to forcibly add the label to this version, otherwise an exception will be thrown.
      • The default is False.
    • ignore_add_tags_errors: (bool, optional)
      • Ignore any exceptions thrown when adding labels.
      • The default is False.

    Examples

    from starwhale import dataset
    with dataset("mnist") as ds:
    ds.append({"label": 1})
    ds.commit(message="init commit")

    readonly

    readonly is a property attribute indicating if the dataset is read-only, it returns a bool value.

    from starwhale import dataset
    ds = dataset("mnist", readonly=True)
    assert ds.readonly

    loading_version

    loading_version is a property attribute, string type.

    • When loading an existing dataset, the loading_version is the related dataset version.
    • When creating a non-existed dataset, the loading_version is equal to the pending_commit_version.

    pending_commit_version

    pending_commit_version is a property attribute, string type. When you call the commit function, the pending_commit_version will be recorded in the Standalone instance ,Server instance or Cloud instance.

    committed_version

    committed_version is a property attribute, string type. After the commit function is called, the committed_version will come out, it is equal to the pending_commit_version. Accessing this attribute without calling commit first will raise an exception.

    remove

    remove is a method equivalent to the swcli dataset remove command, it can delete a dataset.

    def remove(self, force: bool = False) -> None:

    recover

    recover is a method equivalent to the swcli dataset recover command, it can recover a soft-deleted dataset that has not been run garbage collection.

    def recover(self, force: bool = False) -> None:

    summary

    summary is a method equivalent to the swcli dataset summary command, it returns summary information of the dataset.

    def summary(self) -> t.Optional[DatasetSummary]:

    history

    history is a method equivalent to the swcli dataset history command, it returns the history records of the dataset.

    def history(self) -> t.List[t.Dict]:

    flush

    flush is a method that flushes temporarily cached data from memory to persistent storage. The commit and close methods will automatically call flush.

    close

    close is a method that closes opened connections related to the dataset. Dataset also implements contextmanager, so datasets can be automatically closed using with syntax without needing to explicitly call close.

    from starwhale import dataset

    ds = dataset("mnist")
    ds.close()

    with dataset("mnist") as ds:
    print(ds[0])

    head is a method to show the first n rows of a dataset, equivalent to the swcli dataset head command.

    def head(self, n: int = 5, skip_fetch_data: bool = False) -> List[DataRow]:

    fetch_one

    fetch_one is a method to get the first record in a dataset, similar to head(n=1)[0].

    list

    list is a class method to list Starwhale datasets under a project URI, equivalent to the swcli dataset list command.

    @classmethod
    def list(
    cls,
    project_uri: Union[str, Project] = "",
    fullname: bool = False,
    show_removed: bool = False,
    page_index: int = DEFAULT_PAGE_IDX,
    page_size: int = DEFAULT_PAGE_SIZE,
    ) -> Tuple[DatasetListType, Dict[str, Any]]:

    copy

    copy is a method to copy a dataset to another instance, equivalent to the swcli dataset copy command.

    def copy(
    self,
    dest_uri: str,
    dest_local_project_uri: str = "",
    force: bool = False,
    mode: str = DatasetChangeMode.PATCH.value,
    ignore_tags: t.List[str] | None = None,
    ) -> None:

    Parameters

    • dest_uri: (str, required)
      • Dataset URI
    • dest_local_project_uri: (str, optional)
      • When copy the remote dataset into local, the parameter can set for the Project URI.
    • force: (bool, optional)
      • Whether to forcibly overwrite the dataset if there is already one with the same version on the target instance.
      • The default value is False.
      • When the tags are already used for the other dataset version in the dest instance, you should use force option or adjust the tags.
    • mode: (str, optional)
      • Dataset copy mode, default is 'patch'. Mode choices are: 'patch', 'overwrite'.
      • patch: Patch mode, only update the changed rows and columns for the remote dataset.
      • overwrite: Overwrite mode, update records and delete extraneous rows from the remote dataset.
    • ignore_tags (List[str], optional)
      • Ignore tags when copying.
      • In default, copy dataset with all user custom tags.
      • latest and ^v\d+$ are the system builtin tags, they are ignored automatically.

    Examples

    from starwhale import dataset
    ds = dataset("mnist")
    ds.copy("cloud://remote-instance/project/starwhale")

    to_pytorch

    to_pytorch is a method that can convert a Starwhale dataset to a Pytorch torch.utils.data.Dataset, which can then be passed to torch.utils.data.DataLoader for use.

    It should be noted that the to_pytorch function returns a Pytorch IterableDataset.

    def to_pytorch(
    self,
    transform: t.Optional[t.Callable] = None,
    drop_index: bool = True,
    skip_default_transform: bool = False,
    ) -> torch.utils.data.Dataset:

    Parameters

    • transform: (callable, optional)
      • A transform function for input data.
    • drop_index: (bool, optional)
      • Whether to drop the index column.
    • skip_default_transform: (bool, optional)
      • If transform is not set, by default the built-in Starwhale transform function will be used to transform the data. This can be disabled with the skip_default_transform parameter.

    Examples

    import torch.utils.data as tdata
    from starwhale import dataset

    ds = dataset("mnist")

    torch_ds = ds.to_pytorch()
    torch_loader = tdata.DataLoader(torch_ds, batch_size=2)
    import torch.utils.data as tdata
    from starwhale import dataset

    with dataset("mnist") as ds:
    for i in range(0, 10):
    ds.append({"txt": Text(f"data-{i}"), "label": i})

    ds.commit()

    def _custom_transform(data: t.Any) -> t.Any:
    data = data.copy()
    txt = data["txt"].to_str()
    data["txt"] = f"custom-{txt}"
    return data

    torch_loader = tdata.DataLoader(
    dataset(ds.uri).to_pytorch(transform=_custom_transform), batch_size=1
    )
    item = next(iter(torch_loader))
    assert isinstance(item["label"], torch.Tensor)
    assert item["txt"][0] in ("custom-data-0", "custom-data-1")

    to_tensorflow

    to_tensorflow is a method that can convert a Starwhale dataset to a Tensorflow tensorflow.data.Dataset.

    def to_tensorflow(self, drop_index: bool = True) -> tensorflow.data.Dataset:

    Parameters

    • drop_index: (bool, optional)
      • Whether to drop the index column.

    Examples

    from starwhale import dataset
    import tensorflow as tf

    ds = dataset("mnist")
    tf_ds = ds.to_tensorflow(drop_index=True)
    assert isinstance(tf_ds, tf.data.Dataset)

    with_builder_blob_config

    with_builder_blob_config is a method to set blob-related attributes in a Starwhale dataset. It needs to be called before making data changes.

    def with_builder_blob_config(
    self,
    volume_size: int | str | None = D_FILE_VOLUME_SIZE,
    alignment_size: int | str | None = D_ALIGNMENT_SIZE,
    ) -> Dataset:

    Parameters

    • alignment_size: (int|str, optional)
      • The blob alignment size.
      • The default value is 128 Bytes.
    • volume_size: (int|str, optional)
      • The maximum size of a dataset blob file. A new blob file will be generated when the size exceeds this limit.
      • The default value is 64MB.

    Examples

    from starwhale import dataset, Binary

    ds = dataset("mnist").with_builder_blob_config(volume_size="32M", alignment_size=128)
    ds.append({"data": Binary(b"123")})
    ds.commit()
    ds.close()

    with_loader_config

    with_loader_config is a method to set parameters for the Starwhale dataset loader process.

    def with_loader_config(
    self,
    num_workers: t.Optional[int] = None,
    cache_size: t.Optional[int] = None,
    field_transformer: t.Optional[t.Dict] = None,
    ) -> Dataset:

    Parameters

    • num_workers: (int, optional)
      • The workers number for loading dataset.
      • The default value is 2.
    • cache_size: (int, optional)
      • Prefetched data rows.
      • The default value is 20.
    • field_transformer: (dict, optional)
      • features name transform dict.

    Examples

    from starwhale import Dataset, dataset
    Dataset.from_json(
    "translation",
    '[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]'
    )
    myds = dataset("translation").with_loader_config(field_transformer={"en": "en-us"})
    assert myds[0].features["en-us"] == myds[0].features["en"]
    from starwhale import Dataset, dataset
    Dataset.from_json(
    "translation2",
    '[{"content":{"child_content":[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]}}]'
    )
    myds = dataset("translation2").with_loader_config(field_transformer={"content.child_content[0].en": "en-us"})
    assert myds[0].features["en-us"] == myds[0].features["content"]["child_content"][0]["en"]
    - + \ No newline at end of file diff --git a/next/reference/sdk/evaluation/index.html b/next/reference/sdk/evaluation/index.html index c72ac477e..bc8d4d52e 100644 --- a/next/reference/sdk/evaluation/index.html +++ b/next/reference/sdk/evaluation/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    Starwhale Model Evaluation SDK

    @evaluation.predict

    The @evaluation.predict decorator defines the inference process in the Starwhale Model Evaluation, similar to the map phase in MapReduce. It contains the following core features:

    • On the Server instance, require the resources needed to run.
    • Automatically read the local or remote datasets, and pass the data in the datasets one by one or in batches to the function decorated by evaluation.predict.
    • By the replicas setting, implement distributed dataset consumption to horizontally scale and shorten the time required for the model evaluation tasks.
    • Automatically store the return values of the function and the input features of the dataset into the results table, for display in the Web UI and further use in the evaluate phase.
    • The decorated function is called once for each single piece of data or each batch, to complete the inference process.

    Parameters

    • resources: (dict, optional)
      • Defines the resources required by each predict task when running on the Server instance, including memory, cpu, and nvidia.com/gpu.
      • memory: The unit is Bytes, int and float types are supported.
        • Supports setting request and limit as a dictionary, e.g. resources={"memory": {"request": 100 * 1024, "limit": 200 * 1024}}.
        • If only a single number is set, the Python SDK will automatically set request and limit to the same value, e.g. resources={"memory": 100 * 1024} is equivalent to resources={"memory": {"request": 100 * 1024, "limit": 100 * 1024}}.
      • cpu: The unit is the number of CPU cores, int and float types are supported.
        • Supports setting request and limit as a dictionary, e.g. resources={"cpu": {"request": 1, "limit": 2}}.
        • If only a single number is set, the SDK will automatically set request and limit to the same value, e.g. resources={"cpu": 1.5} is equivalent to resources={"cpu": {"request": 1.5, "limit": 1.5}}.
      • nvidia.com/gpu: The unit is the number of GPUs, int type is supported.
        • nvidia.com/gpu does not support setting request and limit, only a single number is supported.
      • Note: The resources parameter currently only takes effect on the Server instances. For the Cloud instances, the same can be achieved by selecting the corresponding resource pool when submitting the evaluation task. Standalone instances do not support this feature at all.
    • replicas: (int, optional)
      • The number of replicas to run predict.
      • predict defines a Step, in which there are multiple equivalent Tasks. Each Task runs on a Pod in Cloud/Server instances, and a Thread in Standalone instances.
      • When multiple replicas are specified, they are equivalent and will jointly consume the selected dataset to achieve distributed dataset consumption. It can be understood that a row in the dataset will only be read by one predict replica.
      • The default is 1.
    • batch_size: (int, optional)
      • Batch size for passing data from the dataset into the function.
      • The default is 1.
    • fail_on_error: (bool, optional)
      • Whether to interrupt the entire model evaluation when the decorated function throws an exception. If you expect some "exceptional" data to cause evaluation failures but don't want to interrupt the overall evaluation, you can set fail_on_error=False.
      • The default is True.
    • auto_log: (bool, optional)
      • Whether to automatically log the return values of the function and the input features of the dataset to the results table.
      • The default is True.
    • log_mode: (str, optional)
      • When auto_log=True, you can set log_mode to define logging the return values in plain or pickle format.
      • The default is pickle.
    • log_dataset_features: (List[str], optional)
      • When auto_log=True, you can selectively log certain features from the dataset via this parameter.
      • By default, all features will be logged.
    • needs: (List[Callable], optional)
      • Defines the prerequisites for this task to run, can use the needs syntax to implement DAG.
      • needs accepts functions decorated by @evaluation.predict, @evaluation.evaluate, and @handler.
      • The default is empty, i.e. does not depend on any other tasks.

    Input

    The decorated functions need to define some input parameters to accept dataset data, etc. They contain the following patterns:

    • data:

      • data is a dict type that can read the features of the dataset.
      • When batch_size=1 or batch_size is not set, the label feature can be read through data['label'] or data.label.
      • When batch_size is set to > 1, data is a list.
      from starwhale import evaluation

      @evaluation.predict
      def predict(data):
      print(data['label'])
      print(data.label)
    • data + external:

      • data is a dict type that can read the features of the dataset.
      • external is also a dict, including: index, index_with_dataset, dataset_info, context and dataset_uri keys. The attributes can be used for the further fine-grained processing.
        • index: The index of the dataset row.
        • index_with_dataset: The index with the dataset info.
        • dataset_info: starwhale.core.dataset.tabular.TabularDatasetInfo Class.
        • context: starwhale.Context Class.
        • dataset_uri: starwhale.nase.uri.resource.Resource Class.
      from starwhale import evaluation

      @evaluation.predict
      def predict(data, external):
      print(data['label'])
      print(data.label)
      print(external["context"])
      print(external["dataset_uri"])
    • data + **kw:

      • data is a dict type that can read the features of the dataset.
      • kw is a dict that contains external.
      from starwhale import evaluation

      @evaluation.predict
      def predict(data, **kw):
      print(kw["external"]["context"])
      print(kw["external"]["dataset_uri"])
    • *args + **kwargs:

      • The first argument of args list is data.
      from starwhale import evaluation

      @evaluation.predict
      def predict(*args, **kw):
      print(args[0].label)
      print(args[0]["label"])
      print(kw["external"]["context"])
    • **kwargs:

      from starwhale import evaluation

      @evaluation.predict
      def predict(**kw):
      print(kw["data"].label)
      print(kw["data"]["label"])
      print(kw["external"]["context"])
    • *args:

      • *args does not contain external.
      from starwhale import evaluation

      @evaluation.predict
      def predict(*args):
      print(args[0].label)
      print(args[0]["label"])

    Examples

    from starwhale import evaluation

    @evaluation.predict
    def predict_image(data):
    ...

    @evaluation.predict(
    dataset="mnist/version/latest",
    batch_size=32,
    replicas=4,
    needs=[predict_image],
    )
    def predict_batch_images(batch_data)
    ...

    @evaluation.predict(
    resources={"nvidia.com/gpu": 1,
    "cpu": {"request": 1, "limit": 2},
    "memory": 200 * 1024}, # 200MB
    log_mode="plain",
    )
    def predict_with_resources(data):
    ...

    @evaluation.predict(
    replicas=1,
    log_mode="plain",
    log_dataset_features=["txt", "img", "label"],
    )
    def predict_with_selected_features(data):
    ...

    @evaluation.evaluate

    @evaluation.evaluate is a decorator that defines the evaluation process in the Starwhale Model evaluation, similar to the reduce phase in MapReduce. It contains the following core features:

    • On the Server instance, apply for the resources.
    • Read the data recorded in the results table automatically during the predict phase, and pass it into the function as an iterator.
    • The evaluate phase will only run one replica, and cannot define the replicas parameter like the predict phase.

    Parameters

    • resources: (dict, optional)
      • Consistent with the resources parameter definition in @evaluation.predict.
    • needs: (List[Callable], optional)
      • Consistent with the needs parameter definition in @evaluation.predict.
      • In the common case, it will depend on a function decorated by @evaluation.predict.
    • use_predict_auto_log: (bool, optional)
      • Defaults to True, passes an iterator that can traverse the predict results to the function.

    Input

    • When use_predict_auto_log=True (default), pass an iterator that can traverse the predict results into the function.
      • The iterated object is a dictionary containing two keys: output and input.
        • output is the element returned by the predict stage function.
        • input is the features of the corresponding dataset during the inference process, which is a dictionary type.
    • When use_predict_auto_log=False, do not pass any parameters into the function.

    Examples

    from starwhale import evaluation

    @evaluation.evaluate(needs=[predict_image])
    def evaluate_results(predict_result_iter):
    ...

    @evaluation.evaluate(
    use_predict_auto_log=False,
    needs=[predict_image],
    )
    def evaluate_results():
    ...

    class Evaluation

    starwhale.Evaluation implements the abstraction for Starwhale Model Evaluation, and can perform operations like logging and scanning for Model Evaluation on Standalone/Server/Cloud instances, to record and retrieve metrics.

    __init__

    __init__ function initializes Evaluation object.

    class Evaluation
    def __init__(self, id: str, project: Project | str) -> None:

    Parameters

    • id: (str, required)
      • The UUID of Model Evaluation that is generated by Starwhale automatically.
    • project: (Project|str, required)
      • Project object or Project URI str.

    Example

    from starwhale import Evaluation

    standalone_e = Evaluation("fcd1206bf1694fce8053724861c7874c", project="self")
    server_e = Evaluation("fcd1206bf1694fce8053724861c7874c", project="cloud://server/project/starwhale:starwhale")
    cloud_e = Evaluation("2ddab20df9e9430dbd73853d773a9ff6", project="https://cloud.starwhale.cn/project/starwhale:llm-leaderboard")

    from_context

    from_context is a classmethod that obtains the Evaluation object under the current Context. from_context can only take effect under the task runtime environment. Calling this method in a non-task runtime environment will raise a RuntimeError exception, indicating that the Starwhale Context has not been properly set.

    @classmethod
    def from_context(cls) -> Evaluation:

    Example

    from starwhale import Evaluation

    with Evaluation.from_context() as e:
    e.log("label/1", 1, {"loss": 0.99, "accuracy": 0.98})

    log

    log is a method that logs evaluation metrics to a specific table, which can then be viewed on the Server/Cloud instance's web page or through the scan method.

    def log(
    self, category: str, id: t.Union[str, int], metrics: t.Dict[str, t.Any]
    ) -> None:

    Parameters

    • category: (str, required)
      • The category of the logged metrics, which will be used as the suffix of the Starwhale Datastore table name.
      • Each category corresponds to a Starwhale Datastore table. These tables will be isolated by the evaluation task ID and will not affect each other.
    • id: (str|int, required)
      • The ID of the logged record, unique within the table.
      • For the same table, only str or int can be used as the ID type.
    • metrics: (dict, required)
      • A dict to log metrics in key-value format.
      • Keys are of str type.
      • Values can be constant types like int, float, str, bytes, bool, or compound types like tuple, list, dict. It also supports logging Artifacts types like Starwhale.Image, Starwhale.Video, Starwhale.Audio, Starwhale.Text, Starwhale.Binary.
        • When the value contains dict type, the Starwhale SDK will automatically flatten the dict for better visualization and metric comparison.
        • For example, if metrics is {"test": {"loss": 0.99, "prob": [0.98,0.99]}, "image": [Image, Image]}, it will be stored as {"test/loss": 0.99, "test/prob": [0.98, 0.99], "image/0": Image, "image/1": Image} after flattening.

    Example

    from starwhale import Evaluation

    evaluation_store = Evaluation.from_context()

    evaluation_store.log("label/1", 1, {"loss": 0.99, "accuracy": 0.98})
    evaluation_store.log("ppl", "1", {"a": "test", "b": 1})

    scan

    scan is a method that returns an iterator for reading data from certain model evaluation tables.

    def scan(
    self,
    category: str,
    start: t.Any = None,
    end: t.Any = None,
    keep_none: bool = False,
    end_inclusive: bool = False,
    ) -> t.Iterator:

    Parameters

    • category: (str, required)
      • Same meaning as the category parameter in the log method.
    • start: (Any, optional)
      • Start key, if not specified, start from the first data item in the table.
    • end: (Any, optional)
      • End key, if not specified, iterate to the end of the table.
    • keep_none: (bool, optional)
      • Whether to return columns with None values, not returned by default.
    • end_inclusive: (bool, optional)
      • Whether to include the row corresponding to end, not included by default.

    Example

    from starwhale import Evaluation

    evaluation_store = Evaluation(id="2ddab20df9e9430dbd73853d773a9ff6", project="https://cloud.starwhale.cn/projects/349")
    results = [data for data in evaluation_store.scan("label/0")]

    flush

    flush is a method that can immediately flush the metrics logged by the log method to the datastore and oss storage. If the flush method is not called, Evaluation will automatically flush data to storage when it is finally closed.

    def flush(self, category: str, artifacts_flush: bool = True) -> None

    Parameters

    • category: (str, required)
      • Same meaning as the category parameter in the log method.
    • artifacts_flush: (bool, optional)
      • Whether to dump artifact data to blob files and upload them to related storage. Default is True.

    log_result

    log_result is a method that logs evaluation metrics to the results table, equivalent to calling the log method with category set to results. The results table is generally used to store inference results. By default, @starwhale.predict will store the return value of the decorated function in the results table, you can also manually store using log_results.

    def log_result(self, id: t.Union[str, int], metrics: t.Dict[str, t.Any]) -> None:

    Parameters

    • id: (str|int, required)
      • The ID of the record, unique within the results table.
      • For the results table, only str or int can be used as the ID type.
    • metrics: (dict, required)
      • Same definition as the metrics parameter in the log method.

    Example

    from starwhale import Evaluation

    evaluation_store = Evaluation(id="2ddab20df9e9430dbd73853d773a9ff6", project="self")
    evaluation_store.log_result(1, {"loss": 0.99, "accuracy": 0.98})
    evaluation_store.log_result(2, {"loss": 0.98, "accuracy": 0.99})

    scan_results

    scan_results is a method that returns an iterator for reading data from the results table.

    def scan_results(
    self,
    start: t.Any = None,
    end: t.Any = None,
    keep_none: bool = False,
    end_inclusive: bool = False,
    ) -> t.Iterator:

    Parameters

    • start: (Any, optional)
      • Start key, if not specified, start from the first data item in the table.
      • Same definition as the start parameter in the scan method.
    • end: (Any, optional)
      • End key, if not specified, iterate to the end of the table.
      • Same definition as the end parameter in the scan method.
    • keep_none: (bool, optional)
      • Whether to return columns with None values, not returned by default.
      • Same definition as the keep_none parameter in the scan method.
    • end_inclusive: (bool, optional)
      • Whether to include the row corresponding to end, not included by default.
      • Same definition as the end_inclusive parameter in the scan method.

    Example

    from starwhale import Evaluation

    evaluation_store = Evaluation(id="2ddab20df9e9430dbd73853d773a9ff6", project="self")

    evaluation_store.log_result(1, {"loss": 0.99, "accuracy": 0.98})
    evaluation_store.log_result(2, {"loss": 0.98, "accuracy": 0.99})
    results = [data for data in evaluation_store.scan_results()]

    flush_results

    flush_results is a method that can immediately flush the metrics logged by the log_results method to the datastore and oss storage. If the flush_results method is not called, Evaluation will automatically flush data to storage when it is finally closed.

    def flush_results(self, artifacts_flush: bool = True) -> None:

    Parameters

    • artifacts_flush: (bool, optional)
      • Whether to dump artifact data to blob files and upload them to related storage. Default is True.
      • Same definition as the artifacts_flush parameter in the flush method.

    log_summary

    log_summary is a method that logs certain metrics to the summary table. The evaluation page on Server/Cloud instances displays data from the summary table.

    Each time it is called, Starwhale will automatically update with the unique ID of this evaluation as the row ID of the table. This function can be called multiple times during one evaluation to update different columns.

    Each project has one summary table. All evaluation tasks under that project will write summary information to this table for easy comparison between evaluations of different models.

    def log_summary(self, *args: t.Any, **kw: t.Any) -> None:

    Same as log method, log_summary will automatically flatten the dict.

    Example

    from starwhale import Evaluation

    evaluation_store = Evaluation(id="2ddab20df9e9430dbd73853d773a9ff6", project="https://cloud.starwhale.cn/projects/349")

    evaluation_store.log_summary(loss=0.99)
    evaluation_store.log_summary(loss=0.99, accuracy=0.99)
    evaluation_store.log_summary({"loss": 0.99, "accuracy": 0.99})

    get_summary

    get_summary is a method that returns the information logged by log_summary.

    def get_summary(self) -> t.Dict:

    flush_summary

    flush_summary is a method that can immediately flush the metrics logged by the log_summary method to the datastore and oss storage. If the flush_results method is not called, Evaluation will automatically flush data to storage when it is finally closed.

    def flush_summary(self, artifacts_flush: bool = True) -> None:

    Parameters

    • artifacts_flush: (bool, optional)
      • Whether to dump artifact data to blob files and upload them to related storage. Default is True.
      • Same definition as the artifacts_flush parameter in the flush method.

    flush_all

    flush_all is a method that can immediately flush the metrics logged by log, log_results, log_summary methods to the datastore and oss storage. If the flush_all method is not called, Evaluation will automatically flush data to storage when it is finally closed.

    def flush_all(self, artifacts_flush: bool = True) -> None:

    Parameters

    • artifacts_flush: (bool, optional)
      • Whether to dump artifact data to blob files and upload them to related storage. Default is True.
      • Same definition as the artifacts_flush parameter in the flush method.

    get_tables

    get_tables is a method that returns the names of all tables generated during model evaluation. Note that this function does not return the summary table name.

    def get_tables(self) -> t.List[str]:

    close

    close is a method to close the Evaluation object. close will automatically flush data to storage when called. Evaluation also implements __enter__ and __exit__ methods, which can simplify manual close calls using with syntax.

    def close(self) -> None:

    Example

    from starwhale import Evaluation

    evaluation_store = Evaluation(id="2ddab20df9e9430dbd73853d773a9ff6", project="https://cloud.starwhale.cn/projects/349")
    evaluation_store.log_summary(loss=0.99)
    evaluation_store.close()

    # auto close when the with-context exits.
    with Evaluation.from_context() as e:
    e.log_summary(loss=0.99)

    @handler

    @handler is a decorator that provides the following functionalities:

    • On a Server instance, it requests the required resources to run.
    • It can control the number of replicas.
    • Multiple handlers can form a DAG through dependency relationships to control the execution workflow.
    • It can expose ports externally to run like a web handler.

    @fine_tune, @evaluation.predict and @evaluation.evalute can be considered applications of @handler in the certain specific areas. @handler is the underlying implementation of these decorators and is more fundamental and flexible.

    @classmethod
    def handler(
    cls,
    resources: t.Optional[t.Dict[str, t.Any]] = None,
    replicas: int = 1,
    needs: t.Optional[t.List[t.Callable]] = None,
    name: str = "",
    expose: int = 0,
    require_dataset: bool = False,
    ) -> t.Callable:

    Parameters

    • resources: (dict, optional)
      • Consistent with the resources parameter definition in @evaluation.predict.
    • needs: (List[Callable], optional)
      • Consistent with the needs parameter definition in @evaluation.predict.
    • replicas: (int, optional)
      • Consistent with the replicas parameter definition in @evaluation.predict.
    • name: (str, optional)
      • The name displayed for the handler.
      • If not specified, use the decorated function's name.
    • expose: (int, optional)
      • The port exposed externally. When running a web handler, the exposed port needs to be declared.
      • The default is 0, meaning no port is exposed.
      • Currently only one port can be exposed.
    • require_dataset: (bool, optional)
      • Defines whether this handler requires a dataset when running.
      • If required_dataset=True, the user is required to input a dataset when creating an evaluation task on the Server/Cloud instance web page. If required_dataset=False, the user does not need to specify a dataset on the web page.
      • The default is False.

    Examples

    from starwhale import handler
    import gradio

    @handler(resources={"cpu": 1, "nvidia.com/gpu": 1}, replicas=3)
    def my_handler():
    ...

    @handler(needs=[my_handler])
    def my_another_handler():
    ...

    @handler(expose=7860)
    def chatbot():
    with gradio.Blocks() as server:
    ...
    server.launch(server_name="0.0.0.0", server_port=7860)

    @fine_tune

    fine_tune is a decorator that defines the fine-tuning process for model training.

    Some restrictions and usage suggestions:

    • fine_tune has only one replica.
    • fine_tune requires dataset input.
    • Generally, the dataset is obtained through Context.get_runtime_context() at the start of fine_tune.
    • Generally, at the end of fine_tune, the fine-tuned Starwhale model package is generated through starwhale.model.build, which will be automatically copied to the corresponding evaluation project.

    Parameters

    • resources: (dict, optional)
      • Consistent with the resources parameter definition in @evaluation.predict.
    • needs: (List[Callable], optional)
      • Consistent with the needs parameter definition in @evaluation.predict.

    Examples

    from starwhale import model as starwhale_model
    from starwhale import fine_tune, Context

    @fine_tune(resources={"nvidia.com/gpu": 1})
    def llama_fine_tuning():
    ctx = Context.get_runtime_context()

    if len(ctx.dataset_uris) == 2:
    # TODO: use more graceful way to get train and eval dataset
    train_dataset = dataset(ctx.dataset_uris[0], readonly=True, create="forbid")
    eval_dataset = dataset(ctx.dataset_uris[1], readonly=True, create="forbid")
    elif len(ctx.dataset_uris) == 1:
    train_dataset = dataset(ctx.dataset_uris[0], readonly=True, create="forbid")
    eval_dataset = None
    else:
    raise ValueError("Only support 1 or 2 datasets(train and eval dataset) for now")

    #user training code
    train_llama(
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    )

    model_name = get_model_name()
    starwhale_model.build(name=f"llama-{model_name}-qlora-ft")

    @multi_classification

    The @multi_classification decorator uses the sklearn lib to analyze results for multi-classification problems, outputting the confusion matrix, ROC, AUC etc., and writing them to related tables in the Starwhale Datastore.

    When using it, certain requirements are placed on the return value of the decorated function, which should be (label, result) or (label, result, probability_matrix).

    def multi_classification(
    confusion_matrix_normalize: str = "all",
    show_hamming_loss: bool = True,
    show_cohen_kappa_score: bool = True,
    show_roc_auc: bool = True,
    all_labels: t.Optional[t.List[t.Any]] = None,
    ) -> t.Any:

    Parameters

    • confusion_matrix_normalize: (str, optional)
      • Accepts three parameters:
        • true: rows
        • pred: columns
        • all: rows+columns
    • show_hamming_loss: (bool, optional)
      • Whether to calculate the Hamming loss.
      • The default is True.
    • show_cohen_kappa_score: (bool, optional)
      • Whether to calculate the Cohen kappa score.
      • The default is True.
    • show_roc_auc: (bool, optional)
      • Whether to calculate ROC/AUC. To calculate, the function needs to return a (label, result, probability_matrix) tuple, otherwise a (label, result) tuple is sufficient.
      • The default is True.
    • all_labels: (List, optional)
      • Defines all the labels.

    Examples


    @multi_classification(
    confusion_matrix_normalize="all",
    show_hamming_loss=True,
    show_cohen_kappa_score=True,
    show_roc_auc=True,
    all_labels=[i for i in range(0, 10)],
    )
    def evaluate(ppl_result) -> t.Tuple[t.List[int], t.List[int], t.List[t.List[float]]]:
    label, result, probability_matrix = [], [], []
    return label, result, probability_matrix

    @multi_classification(
    confusion_matrix_normalize="all",
    show_hamming_loss=True,
    show_cohen_kappa_score=True,
    show_roc_auc=False,
    all_labels=[i for i in range(0, 10)],
    )
    def evaluate(ppl_result) -> t.Tuple[t.List[int], t.List[int], t.List[t.List[float]]]:
    label, result = [], [], []
    return label, result

    PipelineHandler

    The PipelineHandler class provides a default model evaluation workflow definition that requires users to implement the predict and evaluate functions.

    The PipelineHandler is equivalent to using the @evaluation.predict and @evaluation.evaluate decorators together - the usage looks different but the underlying model evaluation process is the same.

    Note that PipelineHandler currently does not support defining resources parameters.

    Users need to implement the following functions:

    • predict: Defines the inference process, equivalent to a function decorated with @evaluation.predict.

    • evaluate: Defines the evaluation process, equivalent to a function decorated with @evaluation.evaluate.

    from typing import Any, Iterator
    from abc import ABCMeta, abstractmethod

    class PipelineHandler(metaclass=ABCMeta):
    def __init__(
    self,
    predict_batch_size: int = 1,
    ignore_error: bool = False,
    predict_auto_log: bool = True,
    predict_log_mode: str = PredictLogMode.PICKLE.value,
    predict_log_dataset_features: t.Optional[t.List[str]] = None,
    **kwargs: t.Any,
    ) -> None:
    self.context = Context.get_runtime_context()
    ...

    def predict(self, data: Any, **kw: Any) -> Any:
    raise NotImplementedError

    def evaluate(self, ppl_result: Iterator) -> Any
    raise NotImplementedError

    Parameters

    • predict_batch_size: (int, optional)
      • Equivalent to the batch_size parameter in @evaluation.predict.
      • Default is 1.
    • ignore_error: (bool, optional)
      • Equivalent to the fail_on_error parameter in @evaluation.predict.
      • Default is False.
    • predict_auto_log: (bool, optional)
      • Equivalent to the auto_log parameter in @evaluation.predict.
      • Default is True.
    • predict_log_mode: (str, optional)
      • Equivalent to the log_mode parameter in @evaluation.predict.
      • Default is pickle.
    • predict_log_dataset_features: (bool, optional)
      • Equivalent to the log_dataset_features parameter in @evaluation.predict.
      • Default is None, which records all features.

    PipelineHandler.run Decorator

    The PipelineHandler.run decorator can be used to describe resources for the predict and evaluate methods, supporting definitions of replicas and resources:

    • The PipelineHandler.run decorator can only decorate predict and evaluate methods in subclasses inheriting from PipelineHandler.
    • The predict method can set the replicas parameter. The replicas value for the evaluate method is always 1.
    • The resources parameter is defined and used in the same way as the resources parameter in @evaluation.predict or @evaluation.evaluate.
    • The PipelineHandler.run decorator is optional.
    • The PipelineHandler.run decorator only takes effect on Server and Cloud instances, not Standalone instances that don't support resource definition.
    @classmethod
    def run(
    cls, resources: t.Optional[t.Dict[str, t.Any]] = None, replicas: int = 1
    ) -> t.Callable:

    Examples

    import typing as t

    import torch
    from starwhale import PipelineHandler

    class Example(PipelineHandler):
    def __init__(self) -> None:
    super().__init__()
    self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    self.model = self._load_model(self.device)

    @PipelineHandler.run(replicas=4, resources={"memory": 1 * 1024 * 1024 *1024, "nvidia.com/gpu": 1}) # 1G Memory, 1 GPU
    def predict(self, data: t.Dict):
    data_tensor = self._pre(data.img)
    output = self.model(data_tensor)
    return self._post(output)

    @PipelineHandler.run(resources={"memory": 1 * 1024 * 1024 *1024}) # 1G Memory
    def evaluate(self, ppl_result):
    result, label, pr = [], [], []
    for _data in ppl_result:
    label.append(_data["input"]["label"])
    result.extend(_data["output"][0])
    pr.extend(_data["output"][1])
    return label, result, pr

    def _pre(self, input: Image) -> torch.Tensor:
    ...

    def _post(self, input):
    ...

    def _load_model(self, device):
    ...

    Context

    The context information passed during model evaluation, including Project, Task ID, etc. The Context content is automatically injected and can be used in the following ways:

    • Inherit the PipelineHandler class and use the self.context object.
    • Get it through Context.get_runtime_context().

    Note that Context can only be used during model evaluation, otherwise the program will throw an exception.

    Currently Context can get the following values:

    • project: str
      • Project name.
    • version: str
      • Unique ID of model evaluation.
    • step: str
      • Step name.
    • total: int
      • Total number of Tasks under the Step.
    • index: int
      • Task index number, starting from 0.
    • dataset_uris: List[str]
      • List of Starwhale dataset URIs.

    Examples


    from starwhale import Context, PipelineHandler

    def func():
    ctx = Context.get_runtime_context()
    print(ctx.project)
    print(ctx.version)
    print(ctx.step)
    ...

    class Example(PipelineHandler):

    def predict(self, data: t.Dict):
    print(self.context.project)
    print(self.context.version)
    print(self.context.step)

    @starwhale.api.service.api

    @starwhale.api.service.api is a decorator that provides a simple Web Handler input definition based on Gradio for accepting external requests and returning inference results to the user when launching a Web Service with the swcli model serve command, enabling online evaluation.

    Examples

    import gradio
    from starwhale.api.service import api

    def predict_image(img):
    ...

    @api(gradio.File(), gradio.Label())
    def predict_view(file: t.Any) -> t.Any:
    with open(file.name, "rb") as f:
    data = Image(f.read(), shape=(28, 28, 1))
    _, prob = predict_image({"img": data})
    return {i: p for i, p in enumerate(prob)}

    starwhale.api.service.Service

    If you want to customize the web service implementation, you can subclass Service and override the serve method.

    class CustomService(Service):
    def serve(self, addr: str, port: int, handler_list: t.List[str] = None) -> None:
    ...

    svc = CustomService()

    @svc.api(...)
    def handler(data):
    ...

    Notes:

    • Handlers added with PipelineHandler.add_api and the api decorator or Service.api can work together
    • If using a custom Service, you need to instantiate the custom Service class in the model

    Custom Request and Response

    Request and Response are handler preprocessing and postprocessing classes for receiving user requests and returning results. They can be simply understood as pre and post logic for the handler.

    Starwhale provides built-in Request implementations for Dataset types and Json Response. Users can also customize the logic as follows:

    import typing as t

    from starwhale.api.service import (
    Request,
    Service,
    Response,
    )

    class CustomInput(Request):
    def load(self, req: t.Any) -> t.Any:
    return req

    class CustomOutput(Response):
    def __init__(self, prefix: str) -> None:
    self.prefix = prefix

    def dump(self, req: str) -> bytes:
    return f"{self.prefix} {req}".encode("utf-8")

    svc = Service()

    @svc.api(request=CustomInput(), response=CustomOutput("hello"))
    def foo(data: t.Any) -> t.Any:
    ...
    - + \ No newline at end of file diff --git a/next/reference/sdk/job/index.html b/next/reference/sdk/job/index.html index 07073b6ee..c57a995f0 100644 --- a/next/reference/sdk/job/index.html +++ b/next/reference/sdk/job/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    Starwhale Job SDK

    job

    Get a starwhale.Job object through the Job URI parameter, which represents a Job on Standalone/Server/Cloud instances.

    @classmethod
    def job(
    cls,
    uri: str,
    ) -> Job:

    Parameters

    • uri: (str, required)
      • Job URI format.

    Usage Example

    from starwhale import job

    # get job object of uri=https://server/job/1
    j1 = job("https://server/job/1")

    # get job from standalone instance
    j2 = job("local/project/self/job/xm5wnup")
    j3 = job("xm5wnup")

    class starwhale.Job

    starwhale.Job abstracts Starwhale Job and enables some information retrieval operations on the job.

    list

    list is a classmethod that can list the jobs under a project.

    @classmethod
    def list(
    cls,
    project: str = "",
    page_index: int = DEFAULT_PAGE_IDX,
    page_size: int = DEFAULT_PAGE_SIZE,
    ) -> Tuple[List[Job], Dict]:

    Parameters

    • project: (str, optional)
      • Project URI, can be projects on Standalone/Server/Cloud instances.
      • If project is not specified, the project selected by swcli project selected will be used.
    • page_index: (int, optional)
      • When getting the jobs list from Server/Cloud instances, paging is supported. This parameter specifies the page number.
        • Default is 1.
        • Page numbers start from 1.
      • Standalone instances do not support paging. This parameter has no effect.
    • page_size: (int, optional)
      • When getting the jobs list from Server/Cloud instances, paging is supported. This parameter specifies the number of jobs returned per page.
        • Default is 1.
        • Page numbers start from 1.
      • Standalone instances do not support paging. This parameter has no effect.

    Usage Example

    from starwhale import Job

    # list jobs of current selected project
    jobs, pagination_info = Job.list()

    # list jobs of starwhale/public project in the cloud.starwhale.cn instance
    jobs, pagination_info = Job.list("https://cloud.starwhale.cn/project/starwhale:public")

    # list jobs of id=1 project in the server instance, page index is 2, page size is 10
    jobs, pagination_info = Job.list("https://server/project/1", page_index=2, page_size=10)

    get

    get is a classmethod that gets information about a specific job and returns a Starwhale.Job object. It has the same functionality and parameter definitions as the starwhale.job function.

    Usage Example

    from starwhale import Job

    # get job object of uri=https://server/job/1
    j1 = Job.get("https://server/job/1")

    # get job from standalone instance
    j2 = Job.get("local/project/self/job/xm5wnup")
    j3 = Job.get("xm5wnup")

    summary

    summary is a property that returns the data written to the summary table during the job execution, in dict type.

    @property
    def summary(self) -> Dict[str, Any]:

    Usage Example

    from starwhale import jobs

    j1 = job("https://server/job/1")

    print(j1.summary)

    tables

    tables is a property that returns the names of tables created during the job execution (not including the summary table, which is created automatically at the project level), in list type.

    @property
    def tables(self) -> List[str]:

    Usage Example

    from starwhale import jobs

    j1 = job("https://server/job/1")

    print(j1.tables)

    get_table_rows

    get_table_rows is a method that returns records from a data table according to the table name and other parameters, in iterator type.

    def get_table_rows(
    self,
    name: str,
    start: Any = None,
    end: Any = None,
    keep_none: bool = False,
    end_inclusive: bool = False,
    ) -> Iterator[Dict[str, Any]]:

    Parameters

    • name: (str, required)
      • Datastore table name. The one of table names obtained through the tables property is ok.
    • start: (Any, optional)
      • The starting ID value of the returned records.
      • Default is None, meaning start from the beginning of the table.
    • end: (Any, optional)
      • The ending ID value of the returned records.
      • Default is None, meaning until the end of the table.
      • If both start and end are None, all records in the table will be returned as an iterator.
    • keep_none: (bool, optional)
      • Whether to return records with None values.
      • Default is False.
    • end_inclusive: (bool, optional)
      • When end is set, whether the iteration includes the end record.
      • Default is False.

    Usage Example

    from starwhale import job

    j = job("local/project/self/job/xm5wnup")

    table_name = j.tables[0]

    for row in j.get_table_rows(table_name):
    print(row)

    rows = list(j.get_table_rows(table_name, start=0, end=100))

    # return the first record from the results table
    result = list(j.get_table_rows('results', start=0, end=1))[0]

    status

    status is a property that returns the current real-time state of the Job as a string. The possible states are CREATED, READY, PAUSED, RUNNING, CANCELLING, CANCELED, SUCCESS, FAIL, and UNKNOWN.

    @property
    def status(self) -> str:

    create

    create is a classmethod that can create tasks on a Standalone instance or Server/Cloud instance, including tasks for Model Evaluation, Fine-tuning, Online Serving, and Developing. The function returns a Job object.

    • create determines which instance the generated task runs on through the project parameter, including Standalone and Server/Cloud instances.
    • On a Standalone instance, create creates a synchronously executed task.
    • On a Server/Cloud instance, create creates an asynchronously executed task.
    @classmethod
    def create(
    cls,
    project: Project | str,
    model: Resource | str,
    run_handler: str,
    datasets: t.List[str | Resource] | None = None,
    runtime: Resource | str | None = None,
    resource_pool: str = DEFAULT_RESOURCE_POOL,
    ttl: int = 0,
    dev_mode: bool = False,
    dev_mode_password: str = "",
    dataset_head: int = 0,
    overwrite_specs: t.Dict[str, t.Any] | None = None,
    ) -> Job:

    Parameters

    Parameters apply to all instances:

    • project: (Project|str, required)
      • A Project object or Project URI string.
    • model: (Resource|str, required)
      • Model URI string or Resource object of Model type, representing the Starwhale model package to run.
    • run_handler: (str, required)
      • The name of the runnable handler in the Starwhale model package, e.g. the evaluate handler of mnist: mnist.evaluator:MNISTInference.evaluate.
    • datasets: (List[str | Resource], optional)
      • Datasets required for the Starwhale model package to run, not required.

    Parameters only effective for Standalone instances:

    • dataset_head: (int, optional)
      • Generally used for debugging scenarios, only uses the first N data in the dataset for the Starwhale model to consume.

    Parameters only effective for Server/Cloud instances:

    • runtime: (Resource | str, optional)
      • Runtime URI string or Resource object of Runtime type, representing the Starwhale runtime required to run the task.
      • When not specified, it will try to use the built-in runtime of the Starwhale model package.
      • When creating tasks under a Standalone instance, the Python interpreter environment used by the Python script is used as its own runtime. Specifying a runtime via the runtime parameter is not supported. If you need to specify a runtime, you can use the swcli model run command.
    • resource_pool: (str, optional)
      • Specify which resource pool the task runs in, default to the default resource pool.
    • ttl: (int, optional)
      • Maximum lifetime of the task, will be killed after timeout.
      • The unit is seconds.
      • By default, ttl is 0, meaning no timeout limit, and the task will run as expected.
      • When ttl is less than 0, it also means no timeout limit.
    • dev_mode: (bool, optional)
      • Whether to set debug mode. After turning on this mode, you can enter the related environment through VSCode Web.
      • Debug mode is off by default.
    • dev_mode_password: (str, optional)
      • Login password for VSCode Web in debug mode.
      • Default is empty, in which case the task's UUID will be used as the password, which can be obtained via job.info().job.uuid.
    • overwrite_specs: (Dict[str, Any], optional)
      • Support setting the replicas and resources fields of the handler.
      • If empty, use the values set in the corresponding handler of the model package.
      • The key of overwrite_specs is the name of the handler, e.g. the evaluate handler of mnist: mnist.evaluator:MNISTInference.evaluate.
      • The value of overwrite_specs is the set value, in dictionary format, supporting settings for replicas and resources, e.g. {"replicas": 1, "resources": {"memory": "1GiB"}}.

    Examples

    • create a Cloud Instance job
    from starwhale import Job
    project = "https://cloud.starwhale.cn/project/starwhale:public"
    job = Job.create(
    project=project,
    model=f"{project}/model/mnist/version/v0",
    run_handler="mnist.evaluator:MNISTInference.evaluate",
    datasets=[f"{project}/dataset/mnist/version/v0"],
    runtime=f"{project}/runtime/pytorch",
    overwrite_specs={"mnist.evaluator:MNISTInference.evaluate": {"resources": "4GiB"},
    "mnist.evaluator:MNISTInference.predict": {"resources": "8GiB", "replicas": 10}}
    )
    print(job.status)
    • create a Standalone Instance job
    from starwhale import Job
    job = Job.create(
    project="self",
    model="mnist",
    run_handler="mnist.evaluator:MNISTInference.evaluate",
    datasets=["mnist"],
    )
    print(job.status)
    - + \ No newline at end of file diff --git a/next/reference/sdk/model/index.html b/next/reference/sdk/model/index.html index d1cb83a0f..722240e55 100644 --- a/next/reference/sdk/model/index.html +++ b/next/reference/sdk/model/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    Starwhale Model SDK

    model.build

    model.build is a function that can build the Starwhale model, equivalent to the swcli model build command.

    def build(
    modules: t.Optional[t.List[t.Any]] = None,
    workdir: t.Optional[_path_T] = None,
    name: t.Optional[str] = None,
    project_uri: str = "",
    desc: str = "",
    remote_project_uri: t.Optional[str] = None,
    add_all: bool = False,
    tags: t.List[str] | None = None,
    ) -> None:

    Parameters

    • modules: (List[str|object], optional)
      • The search modules supports object(function, class or module) or str(example: "to.path.module", "to.path.module:object").
      • If the argument is not specified, the search modules are the imported modules.
    • name: (str, optional)
      • Starwhale Model name.
      • The default is the current work dir (cwd) name.
    • workdir: (str, Pathlib.Path, optional)
      • The path of the rootdir. The default workdir is the current working dir.
      • All files in the workdir will be packaged. If you want to ignore some files, you can add .swignore file in the workdir.
    • project_uri: (str, optional)
      • The project uri of the Starwhale Model.
      • If the argument is not specified, the project_uri is the config value of swcli project select command.
    • desc: (str, optional)
      • The description of the Starwhale Model.
    • remote_project_uri: (str, optional)
      • Project URI of another example instance. After the Starwhale model is built, it will be automatically copied to the remote instance.
    • add_all: (bool, optional)
      • Add all files in the working directory to the model package(excludes python cache files and virtual environment files when disabled).The .swignore file still takes effect.
      • The default value is False.
    • tags: (List[str], optional)
      • The tags for the model version.
      • latest and ^v\d+$ tags are reserved tags.

    Examples

    from starwhale import model

    # class search handlers
    from .user.code.evaluator import ExamplePipelineHandler
    model.build([ExamplePipelineHandler])

    # function search handlers
    from .user.code.evaluator import predict_image
    model.build([predict_image])

    # module handlers, @handler decorates function in this module
    from .user.code import evaluator
    model.build([evaluator])

    # str search handlers
    model.build(["user.code.evaluator:ExamplePipelineHandler"])
    model.build(["user.code1", "user.code2"])

    # no search handlers, use imported modules
    model.build()

    # add user custom tags
    model.build(tags=["t1", "t2"])
    - + \ No newline at end of file diff --git a/next/reference/sdk/other/index.html b/next/reference/sdk/other/index.html index d4ce6d2d4..a8beb184e 100644 --- a/next/reference/sdk/other/index.html +++ b/next/reference/sdk/other/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    Other SDK

    __version__

    Version of Starwhale Python SDK and swcli, string constant.

    >>> from starwhale import __version__
    >>> print(__version__)
    0.5.7

    init_logger

    Initialize Starwhale logger and traceback. The default value is 0.

    • 0: show only errors, traceback only shows 1 frame.
    • 1: show errors + warnings, traceback shows 5 frames.
    • 2: show errors + warnings + info, traceback shows 10 frames.
    • 3: show errors + warnings + info + debug, traceback shows 100 frames.
    • >=4: show errors + warnings + info + debug + trace, traceback shows 1000 frames.
    def init_logger(verbose: int = 0) -> None:

    login

    Log in to a server/cloud instance. It is equivalent to running the swcli instance login command. Log in to the Standalone instance is meaningless.

    def login(
    instance: str,
    alias: str = "",
    username: str = "",
    password: str = "",
    token: str = "",
    ) -> None:

    Parameters

    • instance: (str, required)
      • The http url of the server/cloud instance.
    • alias: (str, optional)
      • An alias for the instance to simplify the instance part of the Starwhale URI.
      • If not specified, the hostname part of the instance http url will be used.
    • username: (str, optional)
    • password: (str, optional)
    • token: (str, optional)
      • You can only choose one of username + password or token to login to the instance.

    Examples

    from starwhale import login

    # login to Starwhale Cloud instance by token
    login(instance="https://cloud.starwhale.cn", alias="cloud-cn", token="xxx")

    # login to Starwhale Server instance by username and password
    login(instance="http://controller.starwhale.svc", alias="dev", username="starwhale", password="abcd1234")

    logout

    Log out of a server/cloud instance. It is equivalent to running the swcli instance logout command. Log out of the Standalone instance is meaningless.

    def logout(instance: str) -> None:

    Examples

    from starwhale import login, logout

    login(instance="https://cloud.starwhale.cn", alias="cloud-cn", token="xxx")
    # logout by the alias
    logout("cloud-cn")

    login(instance="http://controller.starwhale.svc", alias="dev", username="starwhale", password="abcd1234")
    # logout by the instance http url
    logout("http://controller.starwhale.svc")
    - + \ No newline at end of file diff --git a/next/reference/sdk/overview/index.html b/next/reference/sdk/overview/index.html index 50ba9a0a3..38b0c122c 100644 --- a/next/reference/sdk/overview/index.html +++ b/next/reference/sdk/overview/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    Python SDK Overview

    Starwhale provides a series of Python SDKs to help manage datasets, models, evaluations etc. Using the Starwhale Python SDK can make it easier to complete your ML/DL development tasks.

    Classes

    • PipelineHandler: Provides default model evaluation process definition, requires implementation of predict and evaluate methods.
    • Context: Passes context information during model evaluation, including Project, Task ID etc.
    • class Dataset: Starwhale Dataset class.
    • class starwhale.api.service.Service: The base class of online evaluation.
    • class Job: Starwhale Job class.
    • class Evaluation: Starwhale Evaluation class.

    Functions

    • @multi_classification: Decorator for multi-class problems to simplify evaluate result calculation and storage for better evaluation presentation.
    • @handler: Decorator to define a running entity with resource attributes (mem/cpu/gpu). You can control replica count. Handlers can form DAGs through dependencies to control execution flow.
    • @evaluation.predict: Decorator to define inference process in model evaluation, similar to map phase in MapReduce.
    • @evaluation.evaluate: Decorator to define evaluation process in model evaluation, similar to reduce phase in MapReduce.
    • model.build: Build Starwhale model.
    • @fine_tune: Decorator to define model fine-tuning process.
    • init_logger: Set log level, implement 5-level logging.
    • dataset: Get starwhale.Dataset object, by creating new datasets or loading existing datasets.
    • @starwhale.api.service.api: Decorator to provide a simple Web Handler input definition based on Gradio.
    • login: Log in to the server/cloud instance.
    • logout: Log out of the server/cloud instance.
    • job: Get starwhale.Job object by the Job URI.
    • @PipelineHandler.run: Decorator to define the resources for the predict and evaluate methods in PipelineHandler subclasses.

    Data Types

    • COCOObjectAnnotation: Provides COCO format definitions.
    • BoundingBox: Bounding box type, currently in LTWH format - left_x, top_y, width and height.
    • ClassLabel: Describes the number and types of labels.
    • Image: Image type.
    • GrayscaleImage: Grayscale image type, e.g. MNIST digit images, a special case of Image type.
    • Audio: Audio type.
    • Video: Video type.
    • Text: Text type, default utf-8 encoding, for storing large texts.
    • Binary: Binary type, stored in bytes, for storing large binary content.
    • Line: Line type.
    • Point: Point type.
    • Polygon: Polygon type.
    • Link: Link type, for creating remote-link data.
    • MIMEType: Describes multimedia types supported by Starwhale, used in mime_type attribute of Image, Video etc for better Dataset Viewer.

    Other

    • __version__: Version of Starwhale Python SDK and swcli, string constant.

    Further reading

    - + \ No newline at end of file diff --git a/next/reference/sdk/type/index.html b/next/reference/sdk/type/index.html index 73110daf1..490eb2cee 100644 --- a/next/reference/sdk/type/index.html +++ b/next/reference/sdk/type/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    Starwhale Data Types

    COCOObjectAnnotation

    It provides definitions following the COCO format.

    COCOObjectAnnotation(
    id: int,
    image_id: int,
    category_id: int,
    segmentation: Union[t.List, t.Dict],
    area: Union[float, int],
    bbox: Union[BoundingBox, t.List[float]],
    iscrowd: int,
    )
    ParameterDescription
    idObject id, usually a globally incrementing id
    image_idImage id, usually id of the image
    category_idCategory id, usually id of the class in object detection
    segmentationObject contour representation, Polygon (polygon vertices) or RLE format
    areaObject area
    bboxRepresents bounding box, can be BoundingBox type or list of floats
    iscrowd0 indicates a single object, 1 indicates two unseparated objects

    Examples

    def _make_coco_annotations(
    self, mask_fpath: Path, image_id: int
    ) -> t.List[COCOObjectAnnotation]:
    mask_img = PILImage.open(str(mask_fpath))

    mask = np.array(mask_img)
    object_ids = np.unique(mask)[1:]
    binary_mask = mask == object_ids[:, None, None]
    # TODO: tune permute without pytorch
    binary_mask_tensor = torch.as_tensor(binary_mask, dtype=torch.uint8)
    binary_mask_tensor = (
    binary_mask_tensor.permute(0, 2, 1).contiguous().permute(0, 2, 1)
    )

    coco_annotations = []
    for i in range(0, len(object_ids)):
    _pos = np.where(binary_mask[i])
    _xmin, _ymin = float(np.min(_pos[1])), float(np.min(_pos[0]))
    _xmax, _ymax = float(np.max(_pos[1])), float(np.max(_pos[0]))
    _bbox = BoundingBox(
    x=_xmin, y=_ymin, width=_xmax - _xmin, height=_ymax - _ymin
    )

    rle: t.Dict = coco_mask.encode(binary_mask_tensor[i].numpy()) # type: ignore
    rle["counts"] = rle["counts"].decode("utf-8")

    coco_annotations.append(
    COCOObjectAnnotation(
    id=self.object_id,
    image_id=image_id,
    category_id=1, # PennFudan Dataset only has one class-PASPersonStanding
    segmentation=rle,
    area=_bbox.width * _bbox.height,
    bbox=_bbox,
    iscrowd=0, # suppose all instances are not crowd
    )
    )
    self.object_id += 1

    return coco_annotations

    GrayscaleImage

    GrayscaleImage provides a grayscale image type. It is a special case of the Image type, for example the digit images in MNIST.

    GrayscaleImage(
    fp: _TArtifactFP = "",
    display_name: str = "",
    shape: Optional[_TShape] = None,
    as_mask: bool = False,
    mask_uri: str = "",
    )
    ParameterDescription
    fpImage path, IO object, or file content bytes
    display_nameDisplay name shown in Dataset Viewer
    shapeImage width and height, default channel is 1
    as_maskWhether used as a mask image
    mask_uriURI of the original image for the mask

    Examples

    for i in range(0, min(data_number, label_number)):
    _data = data_file.read(image_size)
    _label = struct.unpack(">B", label_file.read(1))[0]
    yield GrayscaleImage(
    _data,
    display_name=f"{i}",
    shape=(height, width, 1),
    ), {"label": _label}

    GrayscaleImage Functions

    GrayscaleImage.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    GrayscaleImage.carry_raw_data

    carry_raw_data() -> GrayscaleImage

    GrayscaleImage.astype

    astype() -> Dict[str, t.Any]

    BoundingBox

    BoundingBox provides a bounding box type, currently in LTWH format:

    • left_x: x-coordinate of left edge
    • top_y: y-coordinate of top edge
    • width: width of bounding box
    • height: height of bounding box

    So it represents the bounding box using the coordinates of its left, top, width and height. This is a common format for specifying bounding boxes in computer vision tasks.

    BoundingBox(
    x: float,
    y: float,
    width: float,
    height: float
    )
    ParameterDescription
    xx-coordinate of left edge (left_x)
    yy-coordinate of top edge (top_y)
    widthWidth of bounding box
    heightHeight of bounding box

    ClassLabel

    Describe labels.

    ClassLabel(
    names: List[Union[int, float, str]]
    )

    Image

    Image Type.

    Image(
    fp: _TArtifactFP = "",
    display_name: str = "",
    shape: Optional[_TShape] = None,
    mime_type: Optional[MIMEType] = None,
    as_mask: bool = False,
    mask_uri: str = "",
    )
    ParameterDescription
    fpImage path, IO object, or file content bytes
    display_nameDisplay name shown in Dataset Viewer
    shapeImage width, height and channels
    mime_typeMIMEType supported types
    as_maskWhether used as a mask image
    mask_uriURI of the original image for the mask

    The main difference from GrayscaleImage is that Image supports multi-channel RGB images by specifying shape as (W, H, C).

    Examples

    import io
    import typing as t
    import pickle
    from PIL import Image as PILImage
    from starwhale import Image, MIMEType

    def _iter_item(paths: t.List[Path]) -> t.Generator[t.Tuple[t.Any, t.Dict], None, None]:
    for path in paths:
    with path.open("rb") as f:
    content = pickle.load(f, encoding="bytes")
    for data, label, filename in zip(
    content[b"data"], content[b"labels"], content[b"filenames"]
    ):
    annotations = {
    "label": label,
    "label_display_name": dataset_meta["label_names"][label],
    }

    image_array = data.reshape(3, 32, 32).transpose(1, 2, 0)
    image_bytes = io.BytesIO()
    PILImage.fromarray(image_array).save(image_bytes, format="PNG")

    yield Image(
    fp=image_bytes.getvalue(),
    display_name=filename.decode(),
    shape=image_array.shape,
    mime_type=MIMEType.PNG,
    ), annotations

    Image Functions

    Image.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Image.carry_raw_data

    carry_raw_data() -> GrayscaleImage

    Image.astype

    astype() -> Dict[str, t.Any]

    Video

    Video type.

    Video(
    fp: _TArtifactFP = "",
    display_name: str = "",
    mime_type: Optional[MIMEType] = None,
    )
    ParameterDescription
    fpVideo path, IO object, or file content bytes
    display_nameDisplay name shown in Dataset Viewer
    mime_typeMIMEType supported types

    Examples

    import typing as t
    from pathlib import Path

    from starwhale import Video, MIMEType

    root_dir = Path(__file__).parent.parent
    dataset_dir = root_dir / "data" / "UCF-101"
    test_ds_path = [root_dir / "data" / "test_list.txt"]

    def iter_ucf_item() -> t.Generator:
    for path in test_ds_path:
    with path.open() as f:
    for line in f.readlines():
    _, label, video_sub_path = line.split()

    data_path = dataset_dir / video_sub_path
    data = Video(
    data_path,
    display_name=video_sub_path,
    shape=(1,),
    mime_type=MIMEType.WEBM,
    )

    yield f"{label}_{video_sub_path}", {
    "video": data,
    "label": label,
    }

    Audio

    Audio type.

    Audio(
    fp: _TArtifactFP = "",
    display_name: str = "",
    mime_type: Optional[MIMEType] = None,
    )
    ParameterDescription
    fpAudio path, IO object, or file content bytes
    display_nameDisplay name shown in Dataset Viewer
    mime_typeMIMEType supported types

    Examples

    import typing as t
    from starwhale import Audio

    def iter_item() -> t.Generator[t.Tuple[t.Any, t.Any], None, None]:
    for path in validation_ds_paths:
    with path.open() as f:
    for item in f.readlines():
    item = item.strip()
    if not item:
    continue

    data_path = dataset_dir / item
    data = Audio(
    data_path, display_name=item, shape=(1,), mime_type=MIMEType.WAV
    )

    speaker_id, utterance_num = data_path.stem.split("_nohash_")
    annotations = {
    "label": data_path.parent.name,
    "speaker_id": speaker_id,
    "utterance_num": int(utterance_num),
    }
    yield data, annotations

    Audio Functions

    Audio.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Audio.carry_raw_data

    carry_raw_data() -> Audio

    Audio.astype

    astype() -> Dict[str, t.Any]

    Text

    Text type, the default encode type is utf-8.

    Text(
    content: str,
    encoding: str = "utf-8",
    )
    ParameterDescription
    contentThe text content
    encodingEncoding format of the text

    Examples

    import typing as t
    from pathlib import Path
    from starwhale import Text

    def iter_item(self) -> t.Generator[t.Tuple[t.Any, t.Any], None, None]:
    root_dir = Path(__file__).parent.parent / "data"

    with (root_dir / "fra-test.txt").open("r") as f:
    for line in f.readlines():
    line = line.strip()
    if not line or line.startswith("CC-BY"):
    continue

    _data, _label, *_ = line.split("\t")
    data = Text(_data, encoding="utf-8")
    annotations = {"label": _label}
    yield data, annotations

    Text Functions

    to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Text.carry_raw_data

    carry_raw_data() -> Text

    Text.astype

    astype() -> Dict[str, t.Any]

    Text.to_str

    to_str() -> str

    Binary

    Binary provides a binary data type, stored as bytes.

    Binary(
    fp: _TArtifactFP = "",
    mime_type: MIMEType = MIMEType.UNDEFINED,
    )
    ParameterDescription
    fpPath, IO object, or file content bytes
    mime_typeMIMEType supported types

    Binary Functions

    Binary.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Binary.carry_raw_data

    carry_raw_data() -> Binary

    Binary.astype

    astype() -> Dict[str, t.Any]

    Link provides a link type to create remote-link datasets in Starwhale.

    Link(
    uri: str,
    auth: Optional[LinkAuth] = DefaultS3LinkAuth,
    offset: int = 0,
    size: int = -1,
    data_type: Optional[BaseArtifact] = None,
    )
    ParameterDescription
    uriURI of the original data, currently supports localFS and S3 protocols
    authLink auth information
    offsetData offset relative to file pointed by uri
    sizeData size
    data_typeActual data type pointed by the link, currently supports Binary, Image, Text, Audio and Video

    Link.astype

    astype() -> Dict[str, t.Any]

    MIMEType

    MIMEType describes the multimedia types supported by Starwhale, implemented using Python Enum. It is used in the mime_type attribute of Image, Video etc to enable better Dataset Viewer support.

    class MIMEType(Enum):
    PNG = "image/png"
    JPEG = "image/jpeg"
    WEBP = "image/webp"
    SVG = "image/svg+xml"
    GIF = "image/gif"
    APNG = "image/apng"
    AVIF = "image/avif"
    PPM = "image/x-portable-pixmap"
    MP4 = "video/mp4"
    AVI = "video/avi"
    WEBM = "video/webm"
    WAV = "audio/wav"
    MP3 = "audio/mp3"
    PLAIN = "text/plain"
    CSV = "text/csv"
    HTML = "text/html"
    GRAYSCALE = "x/grayscale"
    UNDEFINED = "x/undefined"

    Line

    from starwhale import ds, Point, Line

    with dataset("collections") as ds:
    line_points = [
    Point(x=0.0, y=1.0),
    Point(x=0.0, y=100.0)
    ]
    ds.append({"line": line_points})
    ds.commit()

    Point

    from starwhale import ds, Point

    with dataset("collections") as ds:
    ds.append(Point(x=0.0, y=100.0))
    ds.commit()

    Polygon

    from starwhale import ds, Point, Polygon

    with dataset("collections") as ds:
    polygon_points = [
    Point(x=0.0, y=1.0),
    Point(x=0.0, y=100.0),
    Point(x=2.0, y=1.0),
    Point(x=2.0, y=100.0),
    ]
    ds.append({"polygon": polygon_points})
    ds.commit()
    - + \ No newline at end of file diff --git a/next/reference/swcli/dataset/index.html b/next/reference/swcli/dataset/index.html index 75376124c..465bd0887 100644 --- a/next/reference/swcli/dataset/index.html +++ b/next/reference/swcli/dataset/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    swcli dataset

    Overview

    swcli [GLOBAL OPTIONS] dataset [OPTIONS] <SUBCOMMAND> [ARGS]...

    The dataset command includes the following subcommands:

    • build
    • copy(cp)
    • diff
    • head
    • history
    • info
    • list(ls)
    • recover
    • remove(rm)
    • summary
    • tag

    swcli dataset build

    swcli [GLOBAL OPTIONS] dataset build [OPTIONS]

    Build Starwhale Dataset. This command only supports to build standalone dataset.

    Options

    • Data sources options:
    OptionRequiredTypeDefaultsDescription
    -if or --image or --image-folderNStringBuild dataset from image folder, the folder should contain the image files.
    -af or --audio or --audio-folderNStringBuild dataset from audio folder, the folder should contain the audio files.
    -vf or --video or --video-folderNStringBuild dataset from video folder, the folder should contain the video files.
    -h or --handler or --python-handlerNStringBuild dataset from python executor handler, the handler format is [module path]:[class or func name].
    -f or --yaml or --dataset-yamlNdataset.yaml in cwdBuild dataset from dataset.yaml file. Default uses dataset.yaml in the work directory(cwd).
    -jf or --jsonNStringBuild dataset from json or jsonl file, the json or jsonl file option is a json file path or a http downloaded url.The json content structure should be a list[dict] or tuple[dict].
    -hf or --huggingfaceNStringBuild dataset from huggingface dataset, the huggingface option is a huggingface repo name.
    -c or --csvNStringBuild dataset from csv files. The option is a csv file path, dir path or a http downloaded url.The option can be used multiple times.

    Data source options are mutually exclusive, only one option is accepted. If no set, swcli dataset build command will use dataset yaml mode to build dataset with the dataset.yaml in the cwd.

    • Other options:
    OptionRequiredScopeTypeDefaultsDescription
    -pt or --patchone of --patch and --overwriteGlobalBooleanTruePatch mode, only update the changed rows and columns for the existed dataset.
    -ow or --overwriteone of --patch and --overwriteGlobalBooleanFalseOverwrite mode, update records and delete extraneous rows from the existed dataset.
    -n or --nameNGlobalStringDataset name
    -p or --projectNGlobalStringDefault projectProject URI, the default is the current selected project. The dataset will store in the specified project.
    -d or --descNGlobalStringDataset description
    -as or --alignment-sizeNGlobalString128Bswds-bin format dataset: alignment size
    -vs or --volume-sizeNGlobalString64MBswds-bin format dataset: volume size
    -r or --runtimeNGlobalStringRuntime URI
    -w or --workdirNPython Handler ModeStringcwdwork dir to search handler.
    --auto-label/--no-auto-labelNImage/Video/Audio Folder ModeBooleanTrueWhether to auto label by the sub-folder name.
    --field-selectorNJSON File ModeStringThe filed from which you would like to extract dataset array items. The filed is split by the dot(.) symbol.
    --subsetNHuggingface ModeStringHuggingface dataset subset name. If the subset name is not specified, the all subsets will be built.
    --splitNHuggingface ModeStringHuggingface dataset split name. If the split name is not specified, the all splits will be built.
    --revisionNHuggingface ModeStringmainVersion of the dataset script to load. Defaults to 'main'. The option value accepts tag name, or branch name, or commit hash.
    --add-hf-info/--no-add-hf-infoNHuggingface ModeBooleanTrueWhether to add huggingface dataset info to the dataset rows, currently support to add subset and split into the dataset rows. Subset uses _hf_subset field name, split uses _hf_split field name.
    --cache/--no-cacheNHuggingface ModeBooleanTrueWhether to use huggingface dataset cache(download + local hf dataset).
    -t or --tagNGlobalStringDataset tags, the option can be used multiple times.
    --encodingNCSV/JSON/JSONL ModeStringfile encoding.
    --dialectNCSV ModeStringexcelThe csv file dialect, the default is excel. Current supports excel, excel-tab and unix formats.
    --delimiterNCSV ModeString,A one-character string used to separate fields for the csv file.
    --quotecharNCSV ModeString"A one-character string used to quote fields containing special characters, such as the delimiter or quotechar, or which contain new-line characters.
    --skipinitialspace/--no-skipinitialspaceNCSV ModeBoolFalseWhether to skip spaces after delimiter for the csv file.
    --strict/--no-strictNCSV ModeBoolFalseWhen True, raise exception Error if the csv is not well formed.

    Examples for dataset building

    #- from dataset.yaml
    swcli dataset build # build dataset from dataset.yaml in the current work directory(pwd)
    swcli dataset build --yaml /path/to/dataset.yaml # build dataset from /path/to/dataset.yaml, all the involved files are related to the dataset.yaml file.
    swcli dataset build --overwrite --yaml /path/to/dataset.yaml # build dataset from /path/to/dataset.yaml, and overwrite the existed dataset.
    swcli dataset build --tag tag1 --tag tag2

    #- from handler
    swcli dataset build --handler mnist.dataset:iter_mnist_item # build dataset from mnist.dataset:iter_mnist_item handler, the workdir is the current work directory(pwd).
    # build dataset from mnist.dataset:LinkRawDatasetProcessExecutor handler, the workdir is example/mnist
    swcli dataset build --handler mnist.dataset:LinkRawDatasetProcessExecutor --workdir example/mnist

    #- from image folder
    swcli dataset build --image-folder /path/to/image/folder # build dataset from /path/to/image/folder, search all image type files.

    #- from audio folder
    swcli dataset build --audio-folder /path/to/audio/folder # build dataset from /path/to/audio/folder, search all audio type files.

    #- from video folder
    swcli dataset build --video-folder /path/to/video/folder # build dataset from /path/to/video/folder, search all video type files.

    #- from json/jsonl file
    swcli dataset build --json /path/to/example.json
    swcli dataset build --json http://example.com/example.json
    swcli dataset build --json /path/to/example.json --field-selector a.b.c # extract the json_content["a"]["b"]["c"] field from the json file.
    swcli dataset build --name qald9 --json https://raw.githubusercontent.com/ag-sc/QALD/master/9/data/qald-9-test-multilingual.json --field-selector questions
    swcli dataset build --json /path/to/test01.jsonl --json /path/to/test02.jsonl
    swcli dataset build --json https://modelscope.cn/api/v1/datasets/damo/100PoisonMpts/repo\?Revision\=master\&FilePath\=train.jsonl

    #- from huggingface dataset
    swcli dataset build --huggingface mnist
    swcli dataset build -hf mnist --no-cache
    swcli dataset build -hf cais/mmlu --subset anatomy --split auxiliary_train --revision 7456cfb

    #- from csv files
    swcli dataset build --csv /path/to/example.csv
    swcli dataset build --csv /path/to/example.csv --csv-file /path/to/example2.csv
    swcli dataset build --csv /path/to/csv-dir
    swcli dataset build --csv http://example.com/example.csv
    swcli dataset build --name product-desc-modelscope --csv https://modelscope.cn/api/v1/datasets/lcl193798/product_description_generation/repo\?Revision\=master\&FilePath\=test.csv --encoding=utf-8-sig

    swcli dataset copy

    swcli [GLOBAL OPTIONS] dataset copy [OPTIONS] <SRC> <DEST>

    dataset copy copies from SRC to DEST.

    SRC and DEST are both dataset URIs.

    When copying Starwhale Dataset, all custom user-defined labels will be copied by default. You can use the --ignore-tag parameter to ignore certain labels. In addition, the latest and ^v\d+$ labels are Starwhale built-in labels that are only used within the instance itself and will not be copied to other instances.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, DEST will be overwritten if it exists. In addition, if the labels carried during duplication have already been used by other versions, this parameter can be used to forcibly update the labels to this version.
    -p or --patchone of --patch and --overwriteBooleanTruePatch mode, only update the changed rows and columns for the remote dataset.
    -o or --overwriteone of --patch and --overwriteBooleanFalseOverwrite mode, update records and delete extraneous rows from the remote dataset.
    -i or --ignore-tagNStringIgnore tags to copy. The option can be used multiple times.

    Examples for dataset copy

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud dataset to local project(myproject) with a new dataset name 'mnist-local'
    swcli dataset cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq local/project/myproject/mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud dataset to local default project(self) with the cloud instance dataset name 'mnist-cloud'
    swcli dataset cp --patch cloud://pre-k8s/project/dataset/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq .

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud dataset to local project(myproject) with the cloud instance dataset name 'mnist-cloud'
    swcli dataset cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq . -dlp myproject

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud dataset to local default project(self) with a dataset name 'mnist-local'
    swcli dataset cp --overwrite cloud://pre-k8s/project/dataset/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud dataset to local project(myproject) with a dataset name 'mnist-local'
    swcli dataset cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local -dlp myproject

    #- copy standalone instance(local) default project(self)'s mnist-local dataset to cloud instance(pre-k8s) mnist project with a new dataset name 'mnist-cloud'
    swcli dataset cp mnist-local/version/latest cloud://pre-k8s/project/mnist/mnist-cloud

    #- copy standalone instance(local) default project(self)'s mnist-local dataset to cloud instance(pre-k8s) mnist project with standalone instance dataset name 'mnist-local'
    swcli dataset cp mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy standalone instance(local) default project(self)'s mnist-local dataset to cloud instance(pre-k8s) mnist project without 'cloud://' prefix
    swcli dataset cp mnist-local/version/latest pre-k8s/project/mnist

    #- copy standalone instance(local) project(myproject)'s mnist-local dataset to cloud instance(pre-k8s) mnist project with standalone instance dataset name 'mnist-local'
    swcli dataset cp local/project/myproject/dataset/mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy without some tags
    swcli dataset cp mnist cloud://cloud.starwhale.cn/project/starwhale:public --ignore-tag t1 --force

    swcli dataset diff

    swcli [GLOBAL OPTIONS] dataset diff [OPTIONS] <DATASET VERSION> <DATASET VERSION>

    dataset diff compares the difference between two versions of the same dataset.

    DATASET VERSION is a dataset URI.

    OptionRequiredTypeDefaultsDescription
    --show-detailsNBooleanFalseIf true, outputs the detail information.
    swcli [全局选项] dataset head [选项] <DATASET VERSION>

    Print the first n rows of the dataset. DATASET VERSION is a dataset URI.

    OptionRequiredTypeDefaultsDescription
    -n or --rowsNInt5Print the first NUM rows of the dataset.
    -srd or --show-raw-dataNBooleanFalseFetch raw data content from objectstore.
    -st or --show-typesNBooleanFalseshow data types.

    Examples for dataset head

    #- print the first 5 rows of the mnist dataset
    swcli dataset head -n 5 mnist

    #- print the first 10 rows of the mnist(v0 version) dataset and show raw data
    swcli dataset head -n 10 mnist/v0 --show-raw-data

    #- print the data types of the mnist dataset
    swcli dataset head mnist --show-types

    #- print the remote cloud dataset's first 5 rows
    swcli dataset head cloud://cloud-cn/project/test/dataset/mnist -n 5

    #- print the first 5 rows in the json format
    swcli -o json dataset head -n 5 mnist

    swcli dataset history

    swcli [GLOBAL OPTIONS] dataset history [OPTIONS] <DATASET>

    dataset history outputs all history versions of the specified Starwhale Dataset.

    DATASET is a dataset URI.

    OptionRequiredTypeDefaultsDescription
    --fullnameNBooleanFalseShow the full version name. Only the first 12 characters are shown if this option is false.

    swcli dataset info

    swcli [GLOBAL OPTIONS] dataset info [OPTIONS] <DATASET>

    dataset info outputs detailed information about the specified Starwhale Dataset version.

    DATASET is a dataset URI.

    swcli dataset list

    swcli [GLOBAL OPTIONS] dataset list [OPTIONS]

    dataset list shows all Starwhale Datasets.

    OptionRequiredTypeDefaultsDescription
    --projectNStringThe URI of the project to list. Use the default project if not specified.
    --fullnameNBooleanFalseShow the full version name. Only the first 12 characters are shown if this option is false.
    --show-removed or -srNBooleanFalseIf true, include datasets that are removed but not garbage collected.
    --pageNInteger1The starting page number. Server and cloud instances only.
    --sizeNInteger20The number of items in one page. Server and cloud instances only.
    --filter or -flNStringShow only Starwhale Datasetes that match specified filters. This option can be used multiple times in one command.
    FilterTypeDescriptionExample
    nameKey-ValueThe name prefix of datasets--filter name=mnist
    ownerKey-ValueThe dataset owner name--filter owner=starwhale
    latestFlagIf specified, it shows only the latest version.--filter latest

    swcli dataset recover

    swcli [GLOBAL OPTIONS] dataset recover [OPTIONS] <DATASET>

    dataset recover recovers previously removed Starwhale Datasets or versions.

    DATASET is a dataset URI. If the version part of the URI is omitted, all removed versions are recovered.

    Garbage-collected Starwhale Datasets or versions can not be recovered, as well as those are removed with the --force option.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, overwrite the Starwhale Dataset or version with the same name or version id.

    swcli dataset remove

    swcli [GLOBAL OPTIONS] dataset remove [OPTIONS] <DATASET>

    dataset remove removes the specified Starwhale Dataset or version.

    DATASET is a dataset URI. If the version part of the URI is omitted, all versions are removed.

    Removed Starwhale Datasets or versions can be recovered by swcli dataset recover before garbage collection. Use the --force option to persistently remove a Starwhale Dataset or version.

    Removed Starwhale Datasets or versions can be listed by swcli dataset list --show-removed.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, persistently delete the Starwhale Dataset or version. It can not be recovered.

    swcli dataset summary

    swcli [GLOBAL OPTIONS]  dataset summary <DATASET>

    Show dataset summary. DATASET is a dataset URI.

    swcli dataset tag

    swcli [GLOBAL OPTIONS] dataset tag [OPTIONS] <DATASET> [TAGS]...

    dataset tag attaches a tag to a specified Starwhale Dataset version. At the same time, tag command also supports list and remove tags. The tag can be used in a dataset URI instead of the version id.

    DATASET is a dataset URI.

    Each dataset version can have any number of tags, but duplicated tag names are not allowed in the same dataset.

    dataset tag only works for the Standalone Instance.

    OptionRequiredTypeDefaultsDescription
    --remove or -rNBooleanFalseremove the tag if true
    --quiet or -qNBooleanFalseignore errors, for example, removing tags that do not exist.
    --force-add or -fNBooleanFalseWhen adding labels to server/cloud instances, if the label is already used by another dataset version, an error will be prompted. In this case, you can force an update using the --force-add parameter.

    Examples for dataset tag

    #- list tags of the mnist dataset
    swcli dataset tag mnist

    #- add tags for the mnist dataset
    swcli dataset tag mnist t1 t2
    swcli dataset tag cloud://cloud.starwhale.cn/project/public:starwhale/dataset/mnist/version/latest t1 --force-ad
    swcli dataset tag mnist t1 --quiet

    #- remove tags for the mnist dataset
    swcli dataset tag mnist -r t1 t2
    swcli dataset tag cloud://cloud.starwhale.cn/project/public:starwhale/dataset/mnist --remove t1
    - + \ No newline at end of file diff --git a/next/reference/swcli/index.html b/next/reference/swcli/index.html index 6fe67c6e8..a6d321260 100644 --- a/next/reference/swcli/index.html +++ b/next/reference/swcli/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    Overview

    Usage

    swcli [OPTIONS] <COMMAND> [ARGS]...
    note

    sw and starwhale are aliases for swcli.

    Global Options

    OptionDescription
    --versionShow the Starwhale Client version
    -v or --verboseShow verbose log, support multi counts for -v args. More -v args, more logs.
    --helpShow the help message.
    caution

    Global options must be put immediately after swcli, and before any command.

    Commands

    - + \ No newline at end of file diff --git a/next/reference/swcli/instance/index.html b/next/reference/swcli/instance/index.html index 7d5fe12d3..09374e0b9 100644 --- a/next/reference/swcli/instance/index.html +++ b/next/reference/swcli/instance/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    swcli instance

    Overview

    swcli [GLOBAL OPTIONS] instance [OPTIONS] <SUBCOMMAND> [ARGS]

    The instance command includes the following subcommands:

    • info
    • list (ls)
    • login
    • logout
    • use (select)

    swcli instance info

    swcli [GLOBAL OPTIONS] instance info [OPTIONS] <INSTANCE>

    instance info outputs detailed information about the specified Starwhale Instance.

    INSTANCE is an instance URI.

    swcli instance list

    swcli [GLOBAL OPTIONS] instance list [OPTIONS]

    instance list shows all Starwhale Instances.

    swcli instance login

    swcli [GLOBAL OPTIONS] instance login [OPTIONS] <INSTANCE>

    instance login connects to a Server/Cloud instance and makes the specified instance default.

    INSTANCE is an instance URI.

    OptionRequiredTypeDefaultsDescription
    --usernameNStringThe login username.
    --passwordNStringThe login password.
    --tokenNStringThe login token.
    --aliasYStringThe alias of the instance. You can use it anywhere that requires an instance URI.

    --username and --password can not be used together with --token.

    swcli instance logout

    swcli [GLOBAL OPTIONS] instance logout [INSTANCE]

    instance logout disconnects from the Server/Cloud instance, and clears information stored in the local storage.

    INSTANCE is an instance URI. If it is omiited, the default instance is used instead.

    swcli instance use

    swcli [GLOBAL OPTIONS] instance use <INSTANCE>

    instance use make the specified instance default.

    INSTANCE is an instance URI.

    - + \ No newline at end of file diff --git a/next/reference/swcli/job/index.html b/next/reference/swcli/job/index.html index e243b81aa..097f8f078 100644 --- a/next/reference/swcli/job/index.html +++ b/next/reference/swcli/job/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    swcli job

    Overview

    swcli [GLOBAL OPTIONS] job [OPTIONS] <SUBCOMMAND> [ARGS]...

    The job command includes the following subcommands:

    • cancel
    • info
    • list(ls)
    • pause
    • recover
    • remove(rm)
    • resume

    swcli job cancel

    swcli [GLOBAL OPTIONS] job cancel [OPTIONS] <JOB>

    job cancel stops the specified job. On Standalone instance, this command only takes effect for containerized jobs.

    JOB is a job URI.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, kill the Starwhale Job by force.

    swcli job info

    swcli [GLOBAL OPTIONS] job info [OPTIONS] <JOB>

    job info outputs detailed information about the specified Starwhale Job.

    JOB is a job URI.

    swcli job list

    swcli [GLOBAL OPTIONS] job list [OPTIONS]

    job list shows all Starwhale Jobs.

    OptionRequiredTypeDefaultsDescription
    --projectNStringThe URI of the project to list. Use the default project if not specified.
    --show-removed or -srNBooleanFalseIf true, include packages that are removed but not garbage collected.
    --pageNInteger1The starting page number. Server and cloud instances only.
    --sizeNInteger20The number of items in one page. Server and cloud instances only.

    swcli job pause

    swcli [GLOBAL OPTIONS] job pause [OPTIONS] <JOB>

    job pause pauses the specified job. Paused jobs can be resumed by job resume. On Standalone instance, this command only takes effect for containerized jobs.

    JOB is a job URI.

    From Starwhale's perspective, pause is almost the same as cancel, except that the job reuses the old Job id when resumed. It is job developer's responsibility to save all data periodically and load them when resumed. The job id is usually used as a key of the checkpoint.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, kill the Starwhale Job by force.

    swcli job resume

    swcli [GLOBAL OPTIONS] job resume [OPTIONS] <JOB>

    job resume resumes the specified job. On Standalone instance, this command only takes effect for containerized jobs.

    JOB is a job URI.

    - + \ No newline at end of file diff --git a/next/reference/swcli/model/index.html b/next/reference/swcli/model/index.html index dc9c852b1..d7ec5a4c7 100644 --- a/next/reference/swcli/model/index.html +++ b/next/reference/swcli/model/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    swcli model

    Overview

    swcli [GLOBAL OPTIONS] model [OPTIONS] <SUBCOMMAND> [ARGS]...

    The model command includes the following subcommands:

    • build
    • copy(cp)
    • diff
    • extract
    • history
    • info
    • list(ls)
    • recover
    • remove(rm)
    • run
    • serve
    • tag

    swcli model build

    swcli [GLOBAL OPTIONS] model build [OPTIONS] <WORKDIR>

    model build will put the whole WORKDIR into the model, except files that match patterns defined in .swignore.

    model build will import modules specified by --module to generate the required configurations to run the model. If your module depends on third-party libraries, we strongly recommend you use the --runtime option; otherwise, you need to ensure that the python environment used by swcli has these libraries installed.

    OptionRequiredTypeDefaultsDescription
    --project or -pNStringthe default projectthe project URI
    --model-yaml or -fNString${workdir}/model.yamlmodel yaml path, default use ${workdir}/model.yaml file. model.yaml is optional for model build.
    --module or -mNStringPython modules to be imported during the build process. Starwhale will export model handlers from these modules to the model package. This option supports set multiple times.
    --runtime or -rNStringthe URI of the Starwhale Runtime to use when running this command. If this option is used, this command will run in an independent python environment specified by the Starwhale Runtime; otherwise, it will run directly in the swcli's current python environment.
    --name or -nNStringmodel package name
    --desc or -dNStringmodel package description
    --package-runtime--no-package-runtimeNBooleanTrueWhen using the --runtime parameter, by default, the corresponding Starwhale runtime will become the built-in runtime for the Starwhale model. This feature can be disabled with the --no-package-runtime parameter.
    --add-allNBooleanFalseAdd all files in the working directory to the model package(excludes python cache files and virtual environment files when disabled).The .swignore file still takes effect.
    -t or --tagNGlobalString

    Examples for model build

    # build by the model.yaml in current directory and model package will package all the files from the current directory.
    swcli model build .
    # search model run decorators from mnist.evaluate, mnist.train and mnist.predict modules, then package all the files from the current directory to model package.
    swcli model build . --module mnist.evaluate --module mnist.train --module mnist.predict
    # build model package in the Starwhale Runtime environment.
    swcli model build . --module mnist.evaluate --runtime pytorch/version/v1
    # forbid to package Starwhale Runtime into the model.
    swcli model build . --module mnist.evaluate --runtime pytorch/version/v1 --no-package-runtime
    # build model package with tags.
    swcli model build . --tag tag1 --tag tag2

    swcli model copy

    swcli [GLOBAL OPTIONS] model copy [OPTIONS] <SRC> <DEST>

    model copy copies from SRC to DEST for Starwhale Model sharing.

    SRC and DEST are both model URIs.

    When copying Starwhale Model, all custom user-defined labels will be copied by default. You can use the --ignore-tag parameter to ignore certain labels. In addition, the latest and ^v\d+$ labels are Starwhale built-in labels that are only used within the instance itself and will not be copied to other instances.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, DEST will be overwritten if it exists. In addition, if the labels carried during duplication have already been used by other versions, this parameter can be used to forcibly update the labels to this version.
    -i or --ignore-tagNStringIgnore tags to copy. The option can be used multiple times.

    Examples for model copy

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local project(myproject) with a new model name 'mnist-local'
    swcli model cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq local/project/myproject/mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local default project(self) with the cloud instance model name 'mnist-cloud'
    swcli model cp cloud://pre-k8s/project/model/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq .

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local project(myproject) with the cloud instance model name 'mnist-cloud'
    swcli model cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq . -dlp myproject

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local default project(self) with a model name 'mnist-local'
    swcli model cp cloud://pre-k8s/project/model/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local project(myproject) with a model name 'mnist-local'
    swcli model cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local -dlp myproject

    #- copy standalone instance(local) default project(self)'s mnist-local model to cloud instance(pre-k8s) mnist project with a new model name 'mnist-cloud'
    swcli model cp mnist-local/version/latest cloud://pre-k8s/project/mnist/mnist-cloud

    #- copy standalone instance(local) default project(self)'s mnist-local model to cloud instance(pre-k8s) mnist project with standalone instance model name 'mnist-local'
    swcli model cp mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy standalone instance(local) default project(self)'s mnist-local model to cloud instance(pre-k8s) mnist project without 'cloud://' prefix
    swcli model cp mnist-local/version/latest pre-k8s/project/mnist

    #- copy standalone instance(local) project(myproject)'s mnist-local model to cloud instance(pre-k8s) mnist project with standalone instance model name 'mnist-local'
    swcli model cp local/project/myproject/model/mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy without some tags
    swcli model cp mnist cloud://cloud.starwhale.cn/project/starwhale:public --ignore-tag t1

    swcli model diff

    swcli [GLOBAL OPTIONS] model diff [OPTIONS] <MODEL VERSION> <MODEL VERSION>

    model diff compares the difference between two versions of the same model.

    MODEL VERSION is a model URI.

    OptionRequiredTypeDefaultsDescription
    --show-detailsNBooleanFalseIf true, outputs the detail information.

    swcli model extract

    swcli [GLOBAL OPTIONS] model extract [OPTIONS] <MODEL> <TARGET_DIR>

    The model extract command can extract a Starwhale model to a specified directory for further customization.

    MODEL is a model URI.

    OptionRequiredTypeDefaultDescription
    --force or -fNBooleanFalseIf this option is used, it will forcibly overwrite existing extracted model files in the target directory.

    Examples for model extract

    #- extract mnist model package to current directory
    swcli model extract mnist/version/xxxx .

    #- extract mnist model package to current directory and force to overwrite the files
    swcli model extract mnist/version/xxxx . -f

    swcli model history

    swcli [GLOBAL OPTIONS] model history [OPTIONS] <MODEL>

    model history outputs all history versions of the specified Starwhale Model.

    MODEL is a model URI.

    OptionRequiredTypeDefaultsDescription
    --fullnameNBooleanFalseShow the full version name. Only the first 12 characters are shown if this option is false.

    swcli model info

    swcli [GLOBAL OPTIONS] model info [OPTIONS] <MODEL>

    model info outputs detailed information about the specified Starwhale Model version.

    MODEL is a model URI.

    OptionRequiredTypeDefaultsDescription
    --output-filter or -ofNChoice of [basic/model_yaml/manifest/files/handlers/all]basicFilter the output content. Only standalone instance supports this option.

    Examples for model info

    swcli model info mnist # show basic info from the latest version of model
    swcli model info mnist/version/v0 # show basic info from the v0 version of model
    swcli model info mnist/version/latest --output-filter=all # show all info
    swcli model info mnist -of basic # show basic info
    swcli model info mnist -of model_yaml # show model.yaml
    swcli model info mnist -of handlers # show model runnable handlers info
    swcli model info mnist -of files # show model package files tree
    swcli -o json model info mnist -of all # show all info in json format

    swcli model list

    swcli [GLOBAL OPTIONS] model list [OPTIONS]

    model list shows all Starwhale Models.

    OptionRequiredTypeDefaultsDescription
    --projectNStringThe URI of the project to list. Use the default project if not specified.
    --fullnameNBooleanFalseShow the full version name. Only the first 12 characters are shown if this option is false.
    --show-removedNBooleanFalseIf true, include packages that are removed but not garbage collected.
    --pageNInteger1The starting page number. Server and cloud instances only.
    --sizeNInteger20The number of items in one page. Server and cloud instances only.
    --filter or -flNStringShow only Starwhale Models that match specified filters. This option can be used multiple times in one command.
    FilterTypeDescriptionExample
    nameKey-ValueThe name prefix of models--filter name=mnist
    ownerKey-ValueThe model owner name--filter owner=starwhale
    latestFlagIf specified, it shows only the latest version.--filter latest

    swcli model recover

    swcli [GLOBAL OPTIONS] model recover [OPTIONS] <MODEL>

    model recover recovers previously removed Starwhale Models or versions.

    MODEL is a model URI. If the version part of the URI is omitted, all removed versions are recovered.

    Garbage-collected Starwhale Models or versions can not be recovered, as well as those are removed with the --force option.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, overwrite the Starwhale Model or version with the same name or version id.

    swcli model remove

    swcli [GLOBAL OPTIONS] model remove [OPTIONS] <MODEL>

    model remove removes the specified Starwhale Model or version.

    MODEL is a model URI. If the version part of the URI is omitted, all versions are removed.

    Removed Starwhale Models or versions can be recovered by swcli model recover before garbage collection. Use the --force option to persistently remove a Starwhale Model or version.

    Removed Starwhale Models or versions can be listed by swcli model list --show-removed.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, persistently delete the Starwhale Model or version. It can not be recovered.

    swcli model run

    swcli [GLOBAL OPTIONS] model run [OPTIONS]

    model run executes a model handler. Model run supports two modes to run: model URI and local development. Model URI mode needs a pre-built Starwhale Model Package. Local development model only needs the model src dir.

    OptionRequiredTypeDefaultsDescription
    --workdir or -wNStringFor local development mode, the path of model src dir.
    --uri or -uNStringFor model URI mode, the string of model uri.
    --handler or -hNStringRunnable handler index or name, default is None, will use the first handler
    --module or -mNStringThe name of the Python module to import. This parameter can be set multiple times.
    --runtime or -rNStringthe Starwhale Runtime URI to use when running this command. If this option is used, this command will run in an independent python environment specified by the Starwhale Runtime; otherwise, it will run directly in the swcli's current python environment.
    --model-yaml or -fNString${MODEL_DIR}/model.yamlThe path to the model.yaml. model.yaml is optional for model run.
    --run-project or -pNStringDefault projectProject URI, indicates the model run results will be stored in the corresponding project.
    --dataset or -dNStringDataset URI, the Starwhale dataset required for model running. This parameter can be set multiple times.
    --dataset-head or -dhNInteger0[ONLY STANDALONE]For debugging purpose, every prediction task will, at most, consume the first n rows from every dataset.When the value is less than or equal to 0, all samples will be used.
    --in-containerNBooleanFalseUse docker container to run the model. This option is only available for standalone instances. For server and cloud instances, a docker image is always used. If the runtime is a docker image, this option is always implied.
    --forbid-snapshot or -fsNBooleanFalseIn model URI mode, each model run uses a new snapshot directory. Setting this parameter will directly use the model's workdir as the run directory. In local dev mode, this parameter does not take effect, each run is in the --workdir specified directory.
    -- --user-arbitrary-argsNStringSpecify the args you defined in your handlers.

    Examples for model run

    # --> run by model uri
    # run the first handler from model uri
    swcli model run -u mnist/version/latest
    # run index id(1) handler from model uri
    swcli model run --uri mnist/version/latest --handler 1
    # run index fullname(mnist.evaluator:MNISTInference.cmp) handler from model uri
    swcli model run --uri mnist/version/latest --handler mnist.evaluator:MNISTInference.cmp

    # --> run by the working directory, which does not build model package yet. Make local debug happy.
    # run the first handler from the working directory, use the model.yaml in the working directory
    swcli model run -w .
    # run index id(1) handler from the working directory, search mnist.evaluator module and model.yaml handlers(if existed) to get runnable handlers
    swcli model run --workdir . --module mnist.evaluator --handler 1
    # run index fullname(mnist.evaluator:MNISTInference.cmp) handler from the working directory, search mnist.evaluator module to get runnable handlers
    swcli model run --workdir . --module mnist.evaluator --handler mnist.evaluator:MNISTInference.cmp
    # run the f handler in th.py from the working directory with the args defined in th:f
    # @handler()
    # def f(
    # x=ListInput(IntInput()),
    # y=2,
    # mi=MyInput(),
    # ds=DatasetInput(required=True),
    # ctx=ContextInput(),
    # )
    swcli model run -w . -m th --handler th:f -- -x 2 -x=1 --mi=blab-la --ds mnist

    # --> run with dataset of head 10
    swcli model run --uri mnist --dataset-head 10 --dataset mnist

    swcli model serve

    Here is the English translation:

    swcli [GLOBAL OPTIONS] model serve [OPTIONS]

    The model serve command can run the model as a web server, and provide a simple web interaction interface.

    OptionRequiredTypeDefaultsDescription
    --workdir or -wNStringIn local dev mode, specify the directory of the model code.
    --uri or -uNStringIn model URI mode, specify the model URI.
    --runtime or -rNStringThe URI of the Starwhale runtime to use when running this command. If specified, the command will run in the isolated Python environment defined in the Starwhale runtime. Otherwise it will run directly in the current Python environment of swcli.
    --model-yaml or -fNString${MODEL_DIR}/model.yamlThe path to the model.yaml. model.yaml is optional for model serve.
    --module or -mNStringName of the Python module to import. This parameter can be set multiple times.
    --hostNString127.0.0.1The address for the service to listen on.
    --portNInteger8080The port for the service to listen on.

    Examples for model serve

    swcli model serve -u mnist
    swcli model serve --uri mnist/version/latest --runtime pytorch/version/latest

    swcli model serve --workdir . --runtime pytorch/version/v0
    swcli model serve --workdir . --runtime pytorch/version/v1 --host 0.0.0.0 --port 8080
    swcli model serve --workdir . --runtime pytorch --module mnist.evaluator

    swcli model tag

    swcli [GLOBAL OPTIONS] model tag [OPTIONS] <MODEL> [TAGS]...

    model tag attaches a tag to a specified Starwhale Model version. At the same time, tag command also supports list and remove tags. The tag can be used in a model URI instead of the version id.

    MODEL is a model URI.

    Each model version can have any number of tags, but duplicated tag names are not allowed in the same model.

    model tag only works for the Standalone Instance.

    OptionRequiredTypeDefaultsDescription
    --remove or -rNBooleanFalseremove the tag if true
    --quiet or -qNBooleanFalseignore errors, for example, removing tags that do not exist.
    --force-add or -fNBooleanFalseWhen adding labels to server/cloud instances, if the label is already used by another model version, an error will be prompted. In this case, you can force an update using the --force-add parameter.

    Examples for model tag

    #- list tags of the mnist model
    swcli model tag mnist

    #- add tags for the mnist model
    swcli model tag mnist t1 t2
    swcli model tag cloud://cloud.starwhale.cn/project/public:starwhale/model/mnist/version/latest t1 --force-add
    swcli model tag mnist t1 --quiet

    #- remove tags for the mnist model
    swcli model tag mnist -r t1 t2
    swcli model tag cloud://cloud.starwhale.cn/project/public:starwhale/model/mnist --remove t1
    - + \ No newline at end of file diff --git a/next/reference/swcli/project/index.html b/next/reference/swcli/project/index.html index 82d3962b9..d66031678 100644 --- a/next/reference/swcli/project/index.html +++ b/next/reference/swcli/project/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    swcli project

    Overview

    swcli [GLOBAL OPTIONS] project [OPTIONS] <SUBCOMMAND> [ARGS]...

    The project command includes the following subcommands:

    • create(add, new)
    • info
    • list(ls)
    • recover
    • remove(ls)
    • use(select)

    swcli project create

    swcli [GLOBAL OPTIONS] project create <PROJECT>

    project create creates a new project.

    PROJECT is a project URI.

    swcli project info

    swcli [GLOBAL OPTIONS] project info [OPTIONS] <PROJECT>

    project info outputs detailed information about the specified Starwhale Project.

    PROJECT is a project URI.

    swcli project list

    swcli [GLOBAL OPTIONS] project list [OPTIONS]

    project list shows all Starwhale Projects.

    OptionRequiredTypeDefaultsDescription
    --instanceNStringThe URI of the instance to list. If this option is omitted, use the default instance.
    --show-removedNBooleanFalseIf true, include projects that are removed but not garbage collected.
    --pageNInteger1The starting page number. Server and cloud instances only.
    --sizeNInteger20The number of items in one page. Server and cloud instances only.

    swcli project recover

    swcli [GLOBAL OPTIONS] project recover [OPTIONS] <PROJECT>

    project recover recovers previously removed Starwhale Projects.

    PROJECT is a project URI.

    Garbage-collected Starwhale Projects can not be recovered, as well as those are removed with the --force option.

    swcli project remove

    swcli [GLOBAL OPTIONS] project remove [OPTIONS] <PROJECT>

    project remove removes the specified Starwhale Project.

    PROJECT is a project URI.

    Removed Starwhale Projects can be recovered by swcli project recover before garbage collection. Use the --force option to persistently remove a Starwhale Project.

    Removed Starwhale Project can be listed by swcli project list --show-removed.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, persistently delete the Starwhale Project. It can not be recovered.

    swcli project use

    swcli [GLOBAL OPTIONS] project use <PROJECT>

    project use make the specified project default. You must login at first to use a project on a Server/Cloud instance.

    - + \ No newline at end of file diff --git a/next/reference/swcli/runtime/index.html b/next/reference/swcli/runtime/index.html index f405ab1db..3057f8004 100644 --- a/next/reference/swcli/runtime/index.html +++ b/next/reference/swcli/runtime/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    swcli runtime

    Overview

    swcli [GLOBAL OPTIONS] runtime [OPTIONS] <SUBCOMMAND> [ARGS]...

    The runtime command includes the following subcommands:

    • activate(actv)
    • build
    • copy(cp)
    • dockerize
    • extract
    • history
    • info
    • list(ls)
    • recover
    • remove(rm)
    • tag

    swcli runtime activate

    swcli [GLOBAL OPTIONS] runtime activate [OPTIONS] <RUNTIME>

    Like source venv/bin/activate or conda activate xxx, runtime activate setups a new python environment according to the settings of the specified runtime. When the current shell is closed or switched to another one, you need to reactivate the runtime.RUNTIME is a Runtime URI.

    If you want to quit the activated runtime environment, please run venv deactivate in the venv environment or conda deactivate in the conda environment.

    The runtime activate command will build a Python isolated environment and download relevant Python packages according to the definition of the Starwhale runtime when activating the environment for the first time. This process may spend a lot of time.

    swcli runtime build

    swcli [GLOBAL OPTIONS] runtime build [OPTIONS]

    The runtime build command can build a shareable and reproducible runtime environment suitable for ML/DL from various environments or runtime.yaml file.

    Parameters

    • Parameters related to runtime building methods:
    OptionRequiredTypeDefaultsDescription
    -c or --condaNStringFind the corresponding conda environment by conda env name, export Python dependencies to generate Starwhale runtime.
    -cp or --conda-prefixNStringFind the corresponding conda environment by conda env prefix path, export Python dependencies to generate Starwhale runtime.
    -v or --venvNStringFind the corresponding venv environment by venv directory address, export Python dependencies to generate Starwhale runtime.
    -s or --shellNStringExport Python dependencies according to current shell environment to generate Starwhale runtime.
    -y or --yamlNruntime.yaml in cwd directoryBuild Starwhale runtime according to user-defined runtime.yaml.
    -d or --dockerNStringUse the docker image as Starwhale runtime.

    The parameters for runtime building methods are mutually exclusive, only one method can be specified. If not specified, it will use --yaml method to read runtime.yaml in cwd directory to build Starwhale runtime.

    • Other parameters:
    OptionRequiredScopeTypeDefaultsDescription
    --project or -pNGlobalStringDefault projectProject URI
    -del or --disable-env-lockNruntime.yaml modeBooleanFalseWhether to install dependencies in runtime.yaml and lock the version information of related dependencies. The dependencies will be locked by default.
    -nc or --no-cacheNruntime.yaml modeBooleanFalseWhether to delete the isolated environment and install related dependencies from scratch. By default dependencies will be installed in the existing isolated environment.
    --cudaNconda/venv/shell modeChoice[11.3/11.4/11.5/11.6/11.7/]CUDA version, CUDA will not be used by default.
    --cudnnNconda/venv/shell modeChoice[8/]cuDNN version, cuDNN will not be used by default.
    --archNconda/venv/shell modeChoice[amd64/arm64/noarch]noarchArchitecture
    -dpo or --dump-pip-optionsNGlobalBooleanFalseDump pip config options from the ~/.pip/pip.conf file.
    -dcc or --dump-condarcNGlobalBooleanFalseDump conda config from the ~/.condarc file.
    -t or --tagNGlobalStringRuntime tags, the option can be used multiple times.

    Examples for Starwhale Runtime building

    #- from runtime.yaml:
    swcli runtime build # use the current directory as the workdir and use the default runtime.yaml file
    swcli runtime build -y example/pytorch/runtime.yaml # use example/pytorch/runtime.yaml as the runtime.yaml file
    swcli runtime build --yaml runtime.yaml # use runtime.yaml at the current directory as the runtime.yaml file
    swcli runtime build --tag tag1 --tag tag2

    #- from conda name:
    swcli runtime build -c pytorch # lock pytorch conda environment and use `pytorch` as the runtime name
    swcli runtime build --conda pytorch --name pytorch-runtime # use `pytorch-runtime` as the runtime name
    swcli runtime build --conda pytorch --cuda 11.4 # specify the cuda version
    swcli runtime build --conda pytorch --arch noarch # specify the system architecture

    #- from conda prefix path:
    swcli runtime build --conda-prefix /home/starwhale/anaconda3/envs/pytorch # get conda prefix path by `conda info --envs` command

    #- from venv prefix path:
    swcli runtime build -v /home/starwhale/.virtualenvs/pytorch
    swcli runtime build --venv /home/starwhale/.local/share/virtualenvs/pytorch --arch amd64

    #- from docker image:
    swcli runtime build --docker pytorch/pytorch:1.9.0-cuda11.1-cudnn8-runtime # use the docker image as the runtime directly

    #- from shell:
    swcli runtime build -s --cuda 11.4 --cudnn 8 # specify the cuda and cudnn version
    swcli runtime build --shell --name pytorch-runtime # lock the current shell environment and use `pytorch-runtime` as the runtime name

    swcli runtime copy

    swcli [GLOBAL OPTIONS] runtime copy [OPTIONS] <SRC> <DEST>

    runtime copy copies from SRC to DEST. SRC and DEST are both Runtime URIs.

    When copying Starwhale Runtime, all custom user-defined labels will be copied by default. You can use the --ignore-tag parameter to ignore certain labels. In addition, the latest and ^v\d+$ labels are built-in Starwhale system labels that are only used within the instance itself and will not be copied to other instances.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, DEST will be overwritten if it exists. In addition, if the labels carried during duplication have already been used by other versions, this parameter can be used to forcibly update the labels to this version.
    -i or --ignore-tagNStringIgnore tags to copy. The option can be used multiple times.

    Examples for Starwhale Runtime copy

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local project(myproject) with a new runtime name 'mnist-local'
    swcli runtime cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq local/project/myproject/mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local default project(self) with the cloud instance runtime name 'mnist-cloud'
    swcli runtime cp cloud://pre-k8s/project/runtime/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq .

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local project(myproject) with the cloud instance runtime name 'mnist-cloud'
    swcli runtime cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq . -dlp myproject

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local default project(self) with a runtime name 'mnist-local'
    swcli runtime cp cloud://pre-k8s/project/runtime/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local project(myproject) with a runtime name 'mnist-local'
    swcli runtime cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local -dlp myproject

    #- copy standalone instance(local) default project(self)'s mnist-local runtime to cloud instance(pre-k8s) mnist project with a new runtime name 'mnist-cloud'
    swcli runtime cp mnist-local/version/latest cloud://pre-k8s/project/mnist/mnist-cloud

    #- copy standalone instance(local) default project(self)'s mnist-local runtime to cloud instance(pre-k8s) mnist project with standalone instance runtime name 'mnist-local'
    swcli runtime cp mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy standalone instance(local) default project(self)'s mnist-local runtime to cloud instance(pre-k8s) mnist project without 'cloud://' prefix
    swcli runtime cp mnist-local/version/latest pre-k8s/project/mnist

    #- copy standalone instance(local) project(myproject)'s mnist-local runtime to cloud instance(pre-k8s) mnist project with standalone instance runtime name 'mnist-local'
    swcli runtime cp local/project/myproject/runtime/mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy without some tags
    swcli runtime cp pytorch cloud://cloud.starwhale.cn/project/starwhale:public --ignore-tag t1

    swcli runtime dockerize

    swcli [GLOBAL OPTIONS] runtime dockerize [OPTIONS] <RUNTIME>

    runtime dockerize generates a docker image based on the specified runtime. Starwhale uses docker buildx to create the image. Docker 19.03 or later is required to run this command.

    RUNTIME is a Runtime URI.

    OptionRequiredTypeDefaultsDescription
    --tag or -tNStringThe tag of the docker image. This option can be repeated multiple times.
    --pushNBooleanFalseIf true, push the image to the docker registry
    --platformNStringamd64The target platform,can be either amd64 or arm64. This option can be repeated multiple times to create a multi-platform image.

    Here is the English translation:

    swcli runtime extract

    swcli [Global Options] runtime extract [Options] <RUNTIME>

    Starwhale runtimes use the compressed packages to distribute. The runtime extract command can be used to extract the runtime package for further customization and modification.

    OptionRequiredTypeDefaultDescription
    --force or -fNBooleanFalseWhether to delete and re-extract if there is already an extracted Starwhale runtime in the target directory.
    --target-dirNStringCustom extraction directory. If not specified, it will be extracted to the default Starwhale runtime workdir. The command log will show the directory location.

    swcli runtime history

    swcli [GLOBAL OPTIONS] runtime history [OPTIONS] <RUNTIME>

    runtime history outputs all history versions of the specified Starwhale Runtime.

    RUNTIME is a Runtime URI.

    OptionRequiredTypeDefaultsDescription
    --fullnameNBooleanFalseShow the full version name. Only the first 12 characters are shown if this option is false.

    swcli runtime info

    swcli [GLOBAL OPTIONS] runtime info [OPTIONS] <RUNTIME>

    runtime info outputs detailed information about a specified Starwhale Runtime version.

    RUNTIME is a Runtime URI.

    OptionRequiredTypeDefaultsDescription
    --output-filter or -ofNChoice of [basic/runtime_yaml/manifest/lock/all]basicFilter the output content. Only standalone instance supports this option.

    Examples for Starwhale Runtime info

    swcli runtime info pytorch # show basic info from the latest version of runtime
    swcli runtime info pytorch/version/v0 # show basic info
    swcli runtime info pytorch/version/v0 --output-filter basic # show basic info
    swcli runtime info pytorch/version/v1 -of runtime_yaml # show runtime.yaml content
    swcli runtime info pytorch/version/v1 -of lock # show auto lock file content
    swcli runtime info pytorch/version/v1 -of manifest # show _manifest.yaml content
    swcli runtime info pytorch/version/v1 -of all # show all info of the runtime

    swcli runtime list

    swcli [GLOBAL OPTIONS] runtime list [OPTIONS]

    runtime list shows all Starwhale Runtimes.

    OptionRequiredTypeDefaultsDescription
    --projectNStringThe URI of the project to list. Use the default project if not specified.
    --fullnameNBooleanFalseShow the full version name. Only the first 12 characters are shown if this option is false.
    --show-removed or -srNBooleanFalseIf true, include runtimes that are removed but not garbage collected.
    --pageNInteger1The starting page number. Server and cloud instances only.
    --sizeNInteger20The number of items in one page. Server and cloud instances only.
    --filter or -flNStringShow only Starwhale Runtimes that match specified filters. This option can be used multiple times in one command.
    FilterTypeDescriptionExample
    nameKey-ValueThe name prefix of runtimes--filter name=pytorch
    ownerKey-ValueThe runtime owner name--filter owner=starwhale
    latestFlagIf specified, it shows only the latest version.--filter latest

    swcli runtime recover

    swcli [GLOBAL OPTIONS] runtime recover [OPTIONS] <RUNTIME>

    runtime recover can recover previously removed Starwhale Runtimes or versions.

    RUNTIME is a Runtime URI. If the version part of the URI is omitted, all removed versions are recovered.

    Garbage-collected Starwhale Runtimes or versions can not be recovered, as well as those are removed with the --force option.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, overwrite the Starwhale Runtime or version with the same name or version id.

    swcli runtime remove

    swcli [GLOBAL OPTIONS] runtime remove [OPTIONS] <RUNTIME>

    runtime remove removes the specified Starwhale Runtime or version.

    RUNTIME is a Runtime URI. If the version part of the URI is omitted, all versions are removed.

    Removed Starwhale Runtimes or versions can be recovered by swcli runtime recover before garbage collection. Use the -- force option to persistently remove a Starwhale Runtime or version.

    Removed Starwhale Runtimes or versions can be listed by swcli runtime list --show-removed.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, persistently delete the Starwhale Runtime or version. It can not be recovered.

    swcli runtime tag

    swcli [GLOBAL OPTIONS] runtime tag [OPTIONS] <RUNTIME> [TAGS]...

    runtime tag attaches a tag to a specified Starwhale Runtime version. At the same time, tag command also supports list and remove tags. The tag can be used in a runtime URI instead of the version id.

    RUNTIME is a Runtime URI.

    Each runtime version can have any number of tags, but duplicated tag names are not allowed in the same runtime.

    runtime tag only works for the Standalone Instance.

    OptionRequiredTypeDefaultsDescription
    --remove or -rNBooleanFalseRemove the tag if true
    --quiet or -qNBooleanFalseIgnore errors, for example, removing tags that do not exist.
    --force-add or -fNBooleanFalseWhen adding labels to server/cloud instances, if the label is already used by another runtime version, an error will be prompted. In this case, you can force an update using the --force-add parameter.

    Examples for runtime tag

    #- list tags of the pytorch runtime
    swcli runtime tag pytorch

    #- add tags for the pytorch runtime
    swcli runtime tag mnist t1 t2
    swcli runtime tag cloud://cloud.starwhale.cn/project/public:starwhale/runtime/pytorch/version/latest t1 --force-add
    swcli runtime tag mnist t1 --quiet

    #- remove tags for the pytorch runtime
    swcli runtime tag mnist -r t1 t2
    swcli runtime tag cloud://cloud.starwhale.cn/project/public:starwhale/runtime/pytorch --remove t1
    - + \ No newline at end of file diff --git a/next/reference/swcli/utilities/index.html b/next/reference/swcli/utilities/index.html index f946cdcfb..ee296ce90 100644 --- a/next/reference/swcli/utilities/index.html +++ b/next/reference/swcli/utilities/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    Utility Commands

    swcli gc

    swcli [GLOBAL OPTIONS] gc [OPTIONS]

    gc clears removed projects, models, datasets, and runtimes according to the internal garbage collection policy.

    OptionRequiredTypeDefaultsDescription
    --dry-runNBooleanFalseIf true, outputs objects to be removed instead of clearing them.
    --yesNBooleanFalseBypass confirmation prompts.

    swcli check

    swcli [GLOBAL OPTIONS] check

    Check if the external dependencies of the swcli command meet the requirements. Currently mainly checks Docker and Conda.

    swcli completion install

    swcli [GLOBAL OPTIONS] completion install <SHELL_NAME>

    Install autocompletion for swcli commands. Currently supports bash, zsh and fish. If SHELL_NAME is not specified, it will try to automatically detect the current shell type.

    swcli config edit

    swcli [GLOBAL OPTIONS] config edit

    Edit the Starwhale configuration file at ~/.config/starwhale/config.yaml.

    swcli ui

    swcli [GLOBAL OPTIONS] ui <INSTANCE>

    Open the web page for the corresponding instance.

    - + \ No newline at end of file diff --git a/next/runtime/index.html b/next/runtime/index.html index f3afa6ae8..b9e860e5b 100644 --- a/next/runtime/index.html +++ b/next/runtime/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    Starwhale Runtime

    overview

    Overview

    Starwhale Runtime aims to provide a reproducible and sharable running environment for python programs. You can easily share your working environment with your teammates or outsiders, and vice versa. Furthermore, you can run your programs on Starwhale Server or Starwhale Cloud without bothering with the dependencies.

    Starwhale works well with virtualenv, conda, and docker. If you are using one of them, it is straightforward to create a Starwhale Runtime based on your current environment.

    Multiple Starwhale Runtimes on your local machine can be switched freely by one command. You can work on different projects without messing up the environment.Starwhale Runtime consists of two parts: the base image and the dependencies.

    The base image

    The base is a docker image with Python, CUDA, and cuDNN installed. Starwhale provides various base images for you to choose from; see the following list:

    • Computer system architecture:
      • X86 (amd64)
      • Arm (aarch64)
    • Operating system:
      • Ubuntu 20.04 LTS (ubuntu:20.04)
    • Python:
      • 3.7
      • 3.8
      • 3.9
      • 3.10
      • 3.11
    • CUDA:
      • CUDA 11.3 + cuDNN 8.4
      • CUDA 11.4 + cuDNN 8.4
      • CUDA 11.5 + cuDNN 8.4
      • CUDA 11.6 + cuDNN 8.4
      • CUDA 11.7
    - + \ No newline at end of file diff --git a/next/runtime/yaml/index.html b/next/runtime/yaml/index.html index 8f8a1640b..db7e6e36c 100644 --- a/next/runtime/yaml/index.html +++ b/next/runtime/yaml/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    The runtime.yaml Specification

    runtime.yaml is the configuration file that defines the properties of the Starwhale Runtime. runtime.yaml is required for the yaml mode of the swcli runtime build command.

    Examples

    The simplest example

    dependencies:
    - pip:
    - numpy
    name: simple-test

    Define a Starwhale Runtime that uses venv as the Python virtual environment for package isolation, and installs the numpy dependency.

    The llama2 example

    name: llama2
    mode: venv
    environment:
    arch: noarch
    os: ubuntu:20.04
    cuda: 11.7
    python: "3.10"
    dependencies:
    - pip:
    - torch
    - fairscale
    - fire
    - sentencepiece
    - gradio >= 3.37.0
    # external starwhale dependencies
    - starwhale[serve] >= 0.5.5

    The full definition example

    # [required]The name of Starwhale Runtime
    name: demo
    # [optional]The mode of Starwhale Runtime: venv or conda. Default is venv.
    mode: venv
    # [optional]The configurations of pip and conda.
    configs:
    # If you do not use conda, ignore this field.
    conda:
    condarc: # custom condarc config file
    channels:
    - defaults
    show_channel_urls: true
    default_channels:
    - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main
    - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/r
    - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/msys2
    custom_channels:
    conda-forge: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
    pytorch: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
    pytorch-lts: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
    nvidia: https://mirrors.aliyun.com/anaconda/cloud
    ssl_verify: false
    default_threads: 10
    pip:
    # pip config set global.index-url
    index_url: https://example.org/
    # pip config set global.extra-index-url
    extra_index_url: https://another.net/
    # pip config set install.trusted-host
    trusted_host:
    - example.org
    - another.net
    # [optional] The definition of the environment.
    environment:
    # Now it must be ubuntu:20.04
    os: ubuntu:20.04
    # CUDA version. possible values: 11.3, 11.4, 11.5, 11.6, 11.7
    cuda: 11.4
    # Python version. possible values: 3.7, 3.8, 3.9, 3.10, 3.11
    python: 3.8
    # Define your custom base image
    docker:
    image: mycustom.com/docker/image:tag
    # [required] The dependencies of the Starwhale Runtime.
    dependencies:
    # If this item is present, conda env create -f conda.yml will be executed
    - conda.yaml
    # If this item is present, pip install -r requirements.txt will be executed before installing other pip packages
    - requirements.txt
    # Packages to be install with conda. venv mode will ignore the conda field.
    - conda:
    - numpy
    - requests
    # Packages to be installed with pip. The format is the same as requirements.txt
    - pip:
    - pillow
    - numpy
    - deepspeed==0.9.0
    - safetensors==0.3.0
    - transformers @ git+https://github.com/huggingface/transformers.git@3c3108972af74246bc3a0ecf3259fd2eafbacdef
    - peft @ git+https://github.com/huggingface/peft.git@fcff23f005fc7bfb816ad1f55360442c170cd5f5
    - accelerate @ git+https://github.com/huggingface/accelerate.git@eba6eb79dc2ab652cd8b44b37165a4852768a8ac
    # Additional wheels packages to be installed when restoring the runtime
    - wheels:
    - dummy-0.0.0-py3-none-any.whl
    # Additional files to be included in the runtime
    - files:
    - dest: bin/prepare.sh
    name: prepare
    src: scripts/prepare.sh
    # Run some custom commands
    - commands:
    - apt-get install -y libgl1
    - touch /tmp/runtime-command-run.flag
    - + \ No newline at end of file diff --git a/next/server/guides/server_admin/index.html b/next/server/guides/server_admin/index.html index 9b6348bf1..75148c9db 100644 --- a/next/server/guides/server_admin/index.html +++ b/next/server/guides/server_admin/index.html @@ -10,14 +10,14 @@ - +
    Version: WIP

    Controller Admin Settings

    Superuser Password Reset

    In case you forget the superusers password, you could use the sql below to reset the password to abcd1234

    update user_info set user_pwd='ee9533077d01d2d65a4efdb41129a91e', user_pwd_salt='6ea18d595773ccc2beacce26' where id=1

    After that, you could login to the console and then change the password to what you really want.

    System Settings

    You could customize system to make it easier to use by leverage of System setting. Here is an example below:

    dockerSetting:
    registryForPull: "docker-registry.starwhale.cn/star-whale"
    registryForPush: ""
    userName: ""
    password: ""
    insecure: true
    pypiSetting:
    indexUrl: ""
    extraIndexUrl: ""
    trustedHost: ""
    retries: 10
    timeout: 90
    imageBuild:
    resourcePool: ""
    image: ""
    clientVersion: ""
    pythonVersion: ""
    datasetBuild:
    resourcePool: ""
    image: ""
    clientVersion: ""
    pythonVersion: ""
    resourcePoolSetting:
    - name: "default"
    nodeSelector: null
    resources:
    - name: "cpu"
    max: null
    min: null
    defaults: 5.0
    - name: "memory"
    max: null
    min: null
    defaults: 3145728.0
    - name: "nvidia.com/gpu"
    max: null
    min: null
    defaults: null
    tolerations: null
    metadata: null
    isPrivate: null
    visibleUserIds: null
    storageSetting:
    - type: "minio"
    tokens:
    bucket: "users"
    ak: "starwhale"
    sk: "starwhale"
    endpoint: "http://10.131.0.1:9000"
    region: "local"
    hugeFileThreshold: "10485760"
    hugeFilePartSize: "5242880"
    - type: "s3"
    tokens:
    bucket: "users"
    ak: "starwhale"b
    sk: "starwhale"
    endpoint: "http://10.131.0.1:9000"
    region: "local"
    hugeFileThreshold: "10485760"
    hugeFilePartSize: "5242880"

    Image Registry

    Tasks dispatched by the server are based on docker images. Pulling these images could be slow if your internet is not working well. Starwhale Server supports the custom image registries, includes dockerSetting.registryForPush and dockerSetting.registryForPull.

    Resource Pool

    The resourcePoolSetting allows you to manage your cluster in a group manner. It is currently implemented by K8S nodeSelector, you could label your machines in K8S cluster and make them a resourcePool in Starwhale.

    Remote Storage

    The storageSetting allows you to manage the storages the server could access.

    storageSetting:
    - type: s3
    tokens:
    - bucket: starwhale # required
    ak: access_key # required
    sk: scret_key # required
    endpoint: http://s3.region.amazonaws.com # optional
    region: region of the service # required when endpoint is empty
    hugeFileThreshold: 10485760 # bigger than 10MB will use multiple part upload
    hugeFilePartSize: 5242880 # MB part size for multiple part upload
    - type: minio
    tokens:
    - bucket: starwhale # required
    ak: access_key # required
    sk: scret_key # required
    endpoint: http://10.131.0.1:9000 # required
    region: local # optional
    hugeFileThreshold: 10485760 # bigger than 10MB will use multiple part upload
    hugeFilePartSize: 5242880 # MB part size for multiple part upload
    - type: aliyun
    tokens:
    - bucket: starwhale # required
    ak: access_key # required
    sk: scret_key # required
    endpoint: http://10.131.0.2:9000 # required
    region: local # optional
    hugeFileThreshold: 10485760 # bigger than 10MB will use multiple part upload
    hugeFilePartSize: 5242880 # MB part size for multiple part upload

    Every storageSetting item has a corresponding implementation of StorageAccessService interface. Starwhale has four build-in implementations:

    • StorageAccessServiceAliyun matches type in (aliyun,oss)
    • StorageAccessServiceMinio matches type in (minio)
    • StorageAccessServiceS3 matches type in (s3)
    • StorageAccessServiceFile matches type in (fs, file)

    Each of the implementations has different requirements for tokens. endpoint is required when type in (aliyun,minio), region is required when type is s3 and endpoint is empty. While fs/file type requires tokens has name rootDir and serviceProvider. Please refer the code for more details.

    - + \ No newline at end of file diff --git a/next/server/index.html b/next/server/index.html index ce621d3a9..7f25ab1d8 100644 --- a/next/server/index.html +++ b/next/server/index.html @@ -10,13 +10,13 @@ - + - + \ No newline at end of file diff --git a/next/server/installation/docker-compose/index.html b/next/server/installation/docker-compose/index.html index c3e194388..15bc9dbc8 100644 --- a/next/server/installation/docker-compose/index.html +++ b/next/server/installation/docker-compose/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    Install Starwhale Server with Docker Compose

    Prerequisites

    Usage

    Start up the server

    wget https://raw.githubusercontent.com/star-whale/starwhale/main/docker/compose/compose.yaml
    GLOBAL_IP=${your_accessible_ip_for_server} ; docker compose up

    The GLOBAL_IP is the ip for Controller which could be accessed by all swcli both inside docker containers and other user machines.

    compose.yaml contains Starwhale Controller/MySQL/MinIO services. Touch a compose.override.yaml, as its name implies, can contain configuration overrides for compose.yaml. The available configurations are specified here

    - + \ No newline at end of file diff --git a/next/server/installation/docker/index.html b/next/server/installation/docker/index.html index ace7fb491..cdd5cf616 100644 --- a/next/server/installation/docker/index.html +++ b/next/server/installation/docker/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    Install Starwhale Server with Docker

    Prerequisites

    • A running Kubernetes 1.19+ cluster to run tasks.
    • A running MySQL 8.0+ instance to store metadata.
    • A S3-compatible object storage to save datasets, models, and others.

    Please make sure pods on the Kubernetes cluster can access the port exposed by the Starwhale Server installation.

    Prepare an env file for Docker

    Starwhale Server can be configured by environment variables.

    An env file template for Docker is here. You may create your own env file by modifying the template.

    Prepare a kubeconfig file [Optional][SW_SCHEDULER=k8s]

    The kubeconfig file is used for accessing the Kubernetes cluster. For more information about kubeconfig files, see the Official Kubernetes Documentation.

    If you have a local kubectl command-line tool installed, you can run kubectl config view to see your current configuration.

    Run the Docker image

    docker run -it -d --name starwhale-server -p 8082:8082 \
    --restart unless-stopped \
    --mount type=bind,source=<path to your kubeconfig file>,destination=/root/.kube/config,readonly \
    --env-file <path to your env file> \
    ghcr.io/star-whale/server:0.5.6

    For users in the mainland of China, use docker image: docker-registry.starwhale.cn/star-whale/server.

    - + \ No newline at end of file diff --git a/next/server/installation/index.html b/next/server/installation/index.html index 54b971194..710ab6658 100644 --- a/next/server/installation/index.html +++ b/next/server/installation/index.html @@ -10,13 +10,13 @@ - +
    - + \ No newline at end of file diff --git a/next/server/installation/k8s-cluster/index.html b/next/server/installation/k8s-cluster/index.html index d67a45a22..32e9d8e2a 100644 --- a/next/server/installation/k8s-cluster/index.html +++ b/next/server/installation/k8s-cluster/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    Install Starwhale Server to Kubernetes Cluster

    In a private deployment scenario, Starwhale Server can be deployed to a Kubernetes cluster using Helm. Starwhale Server relies on two fundamental infrastructure dependencies: MySQL database and object storage.

    • For production environments, it is recommended to provide externally high-availability MySQL database and object storage.
    • For trial or testing environments, the standalone versions of MySQL and MinIO, included in the Starwhale Charts, can be utilized.

    Prerequisites

    • A running Kubernetes 1.19+ cluster to run tasks.
    • Kubernetes Ingress provides HTTP(S) routing.
    • Helm 3.2.0+.
    • [Production Required] A running MySQL 8.0+ instance to store metadata.
    • [Production Required] A S3-compatible object storage system to save datasets, models, and others. Currently tested compatible object storage services:

    Helm Charts

    Downloading Helm Charts

    helm repo add starwhale https://star-whale.github.io/charts
    helm repo update

    Editing values.yaml (production required)

    In a production environment, it is recommended to configure parameters like the MySQL database, object storage, domain names, and memory allocation by editing values.yaml based on actual deployment needs. Below is a sample values.yaml for reference:

    # Set image registry for China mainland, recommend "docker-registry.starwhale.cn". Other network environments can ignore this setting, will use ghcr.io: https://github.com/orgs/star-whale/packages.
    image:
    registry: docker-registry.starwhale.cn
    org: star-whale

    # External MySQL service depended in production, MySQL version needs to be greater than 8.0
    externalMySQL:
    host: 10.0.1.100 # Database IP address or domain that is accessible within the Kubernetes cluster
    port: 3306
    username: "your-username"
    password: "your-password"
    database: starwhale # Needs to pre-create the database, name can be specified freely, default charset is fine. The database user specified above needs read/write permissions to this database

    # External S3 protocol compatible object storage service relied on in production
    externalOSS:
    host: ks3-cn-beijing.ksyuncs.com # Object storage IP address or domain that is accessible from both the Kubernetes cluster and Standalone instances
    port: 80
    accessKey: "your-ak"
    secretKey: "your-sk"
    defaultBuckets: test-gp # Needs to pre-create the Bucket, name can be specified freely. The ak/sk specified above needs read/write permissions to this Bucket
    region: BEIJING # Object storage corresponding region, defaults to local

    # If external object storage is specified in production, built-in single instance MinIO is not needed
    minio:
    enabled: false

    # If external MySQL is specified in production, built-in single instance MySQL is not needed
    mysql:
    enabled: false

    controller:
    containerPort: 8082
    storageType: "ksyun" # Type of object storage service minio/s3/ksyun/baidu/tencent/aliyun

    ingress:
    enabled: true
    ingressClassName: nginx # Corresponds to the Ingress Controller in the Kubernetes cluster
    host: server-domain-name # External accessible domain name for the Server
    path: /

    # Recommend at least 32GB memory and 8 CPU cores for Starwhale Server in production
    resources:
    controller:
    limits:
    memory: 32G
    cpu: 8
    requests:
    memory: 32G
    cpu: 8

    # Downloading Python Packages defined in Starwhale Runtime requires setting PyPI mirror corresponding to actual network environment. Can also modify later in Server System Settings page.
    mirror:
    pypi:
    enabled: true
    indexUrl: "https://mirrors.aliyun.com/pypi/simple/"
    extraIndexUrl: "https://pypi.tuna.tsinghua.edu.cn/simple/"
    trustedHost: "mirrors.aliyun.com pypi.tuna.tsinghua.edu.cn"

    Deploying/Upgrading Starwhale Server

    The following command can be used for both initial deployment and upgrades. It will automatically create a Kubernetes namespace called "starwhale". values.custom.yaml is the values.yaml file written according to the actual needs of the cluster.

    helm upgrade --devel --install starwhale starwhale/starwhale --namespace starwhale --create-namespace -f values.custom.yaml

    If you have a local kubectl command-line tool installed, you can run kubectl get pods -n starwhale to check if all pods are running.

    Uninstalling Starwhale Server

    helm delete starwhale-server
    - + \ No newline at end of file diff --git a/next/server/installation/minikube/index.html b/next/server/installation/minikube/index.html index 2fffcbcdf..d3e5e23c5 100644 --- a/next/server/installation/minikube/index.html +++ b/next/server/installation/minikube/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    Install Starwhale Server with Minikube

    Prerequisites

    Starting Minikube

    minikube start --addons ingress

    For users in the mainland of China, please run the following commands:

    minikube start --kubernetes-version=1.25.3 --image-repository=docker-registry.starwhale.cn/minikube --base-image=docker-registry.starwhale.cn/minikube/k8s-minikube/kicbase:v0.0.42

    minikube addons enable ingress --images="KubeWebhookCertgenPatch=ingress-nginx/kube-webhook-certgen:v20231011-8b53cabe0,KubeWebhookCertgenCreate=ingress-nginx/kube-webhook-certgen:v20231011-8b53cabe0,IngressController=ingress-nginx/controller:v1.9.4"

    The docker registry docker-registry.starwhale.cn/minikube currently only caches the images for Kubernetes 1.25.3. Another choice, you can also use Aliyun mirror:

    minikube start --image-mirror-country=cn

    minikube addons enable ingress --images="KubeWebhookCertgenPatch=kube-webhook-certgen:v20231011-8b53cabe0,KubeWebhookCertgenCreate=kube-webhook-certgen:v20231011-8b53cabe0,IngressController=nginx-ingress-controller:v1.9.4" --registries="KubeWebhookCertgenPatch=registry.cn-hangzhou.aliyuncs.com/google_containers,KubeWebhookCertgenCreate=registry.cn-hangzhou.aliyuncs.com/google_containers,IngressController=registry.cn-hangzhou.aliyuncs.com/google_containers"

    If there is no kubectl bin in your machine, you may use minikube kubectl or alias kubectl="minikube kubectl --" alias command.

    Installing Starwhale Server

    helm repo add starwhale https://star-whale.github.io/charts
    helm repo update
    helm pull starwhale/starwhale --untar --untardir ./charts

    helm upgrade --install starwhale ./charts/starwhale -n starwhale --create-namespace -f ./charts/starwhale/values.minikube.global.yaml

    For users in the mainland of China, use values.minikube.global.yaml:

    helm upgrade --install starwhale ./charts/starwhale -n starwhale --create-namespace -f ./charts/starwhale/values.minikube.cn.yaml

    After the installation is successful, the following prompt message appears:

        Release "starwhale" has been upgraded. Happy Helming!
    NAME: starwhale
    LAST DEPLOYED: Tue Feb 14 16:25:03 2023
    NAMESPACE: starwhale
    STATUS: deployed
    REVISION: 14
    NOTES:
    ******************************************
    Chart Name: starwhale
    Chart Version: 0.5.6
    App Version: latest
    Starwhale Image:
    - server: ghcr.io/star-whale/server:latest

    ******************************************
    Controller:
    - visit: http://controller.starwhale.svc
    Minio:
    - web visit: http://minio.starwhale.svc
    - admin visit: http://minio-admin.starwhale.svc
    MySQL:
    - port-forward:
    - run: kubectl port-forward --namespace starwhale svc/mysql 3306:3306
    - visit: mysql -h 127.0.0.1 -P 3306 -ustarwhale -pstarwhale
    Please run the following command for the domains searching:
    echo "$(sudo minikube ip) controller.starwhale.svc minio.starwhale.svc minio-admin.starwhale.svc " | sudo tee -a /etc/hosts
    ******************************************
    Login Info:
    - starwhale: u:starwhale, p:abcd1234
    - minio admin: u:minioadmin, p:minioadmin

    *_* Enjoy to use Starwhale Platform. *_*

    Checking Starwhale Server status

    Keep checking the minikube service status until all deployments are running(waiting for 3~5 mins):

    kubectl get deployments -n starwhale
    NAMEREADYUP-TO-DATEAVAILABLEAGE
    controller1/1115m
    minio1/1115m
    mysql1/1115m

    Visiting for local

    Make the Starwhale controller accessible locally with the following command:

    echo "$(sudo minikube ip) controller.starwhale.svc minio.starwhale.svc  minio-admin.starwhale.svc " | sudo tee -a /etc/hosts

    Then you can visit http://controller.starwhale.svc in your local web browser.

    Visiting for others

    • Step 1: in the Starwhale Server machine

      for temporary use with socat command:

      # install socat at first, ref: https://howtoinstall.co/en/socat
      sudo socat TCP4-LISTEN:80,fork,reuseaddr,bind=0.0.0.0 TCP4:`minikube ip`:80

      When you kill the socat process, the share access will be blocked. iptables maybe a better choice for long-term use.

    • Step 2: in the other machines

      # for macOSX or Linux environment, run the command in the shell.
      echo ${your_machine_ip} controller.starwhale.svc minio.starwhale.svc minio-admin.starwhale.svc " | sudo tee -a /etc/hosts

      # for Windows environment, run the command in the PowerShell with administrator permission.
      Add-Content -Path C:\Windows\System32\drivers\etc\hosts -Value "`n${your_machine_ip} controller.starwhale.svc minio.starwhale.svc minio-admin.starwhale.svc"
    - + \ No newline at end of file diff --git a/next/server/installation/starwhale_env/index.html b/next/server/installation/starwhale_env/index.html index 61233e350..94cd9d7a0 100644 --- a/next/server/installation/starwhale_env/index.html +++ b/next/server/installation/starwhale_env/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    Starwhale Server Environment Example

    ################################################################################
    # *** Required ***
    # The external Starwhale server URL. For example: https://cloud.starwhale.ai
    SW_INSTANCE_URI=

    # The listening port of Starwhale Server
    SW_CONTROLLER_PORT=8082

    # The maximum upload file size. This setting affects datasets and models uploading when copied from outside.
    SW_UPLOAD_MAX_FILE_SIZE=20480MB
    ################################################################################
    # The base URL of the Python Package Index to use when creating a runtime environment.
    SW_PYPI_INDEX_URL=http://10.131.0.1/repository/pypi-hosted/simple/

    # Extra URLs of package indexes to use in addition to the base url.
    SW_PYPI_EXTRA_INDEX_URL=

    # Space separated hostnames. When any host specified in the base URL or extra URLs does not have a valid SSL
    # certification, use this option to trust it anyway.
    SW_PYPI_TRUSTED_HOST=
    ################################################################################
    # The JWT token expiration time. When the token expires, the server will request the user to login again.
    SW_JWT_TOKEN_EXPIRE_MINUTES=43200

    # *** Required ***
    # The JWT secret key. All strings are valid, but we strongly recommend you to use a random string with at least 16 characters.
    SW_JWT_SECRET=
    ################################################################################
    # The scheduler controller to use. Valid values are:
    # docker: Controller schedule jobs by leveraging docker
    # k8s: Controller schedule jobs by leveraging Kubernetes
    SW_SCHEDULER=k8s

    # The Kubernetes namespace to use when running a task when SW_SCHEDULER is k8s
    SW_K8S_NAME_SPACE=default

    # The path on the Kubernetes host node's filesystem to cache Python packages. Use the setting only if you have
    # the permission to use host node's filesystem. The runtime environment setup process may be accelerated when the host
    # path cache is used. Leave it blank if you do not want to use it.
    SW_K8S_HOST_PATH_FOR_CACHE=

    # The ip for the containers created by Controller when SW_SCHEDULER is docker
    SW_DOCKER_CONTAINER_NODE_IP=127.0.0.1
    ###############################################################################
    # *** Required ***
    # The object storage system type. Valid values are:
    # s3: [AWS S3](https://aws.amazon.com/s3) or other s3-compatible object storage systems
    # aliyun: [Aliyun OSS](https://www.alibabacloud.com/product/object-storage-service)
    # minio: [MinIO](https://min.io)
    # file: Local filesystem
    SW_STORAGE_TYPE=

    # The path prefix for all data saved on the storage system.
    SW_STORAGE_PREFIX=
    ################################################################################
    # The following settings are only used when SW_STORAGE_TYPE is file.

    # The root directory to save data.
    # This setting is only used when SW_STORAGE_TYPE is file.
    SW_STORAGE_FS_ROOT_DIR=/usr/local/starwhale
    ################################################################################
    # The following settings are only used when SW_STORAGE_TYPE is not file.

    # *** Required ***
    # The name of the bucket to save data.
    SW_STORAGE_BUCKET=

    # *** Required ***
    # The endpoint URL of the object storage service.
    # This setting is only used when SW_STORAGE_TYPE is s3 or aliyun.
    SW_STORAGE_ENDPOINT=

    # *** Required ***
    # The access key used to access the object storage system.
    SW_STORAGE_ACCESSKEY=

    # *** Required ***
    # The secret access key used to access the object storage system.
    SW_STORAGE_SECRETKEY=

    # *** Optional ***
    # The region of the object storage system.
    SW_STORAGE_REGION=

    # Starwhale Server will use multipart upload when uploading a large file. This setting specifies the part size.
    SW_STORAGE_PART_SIZE=5MB
    ################################################################################
    # MySQL settings

    # *** Required ***
    # The hostname/IP of the MySQL server.
    SW_METADATA_STORAGE_IP=

    # The port of the MySQL server.
    SW_METADATA_STORAGE_PORT=3306

    # *** Required ***
    # The database used by Starwhale Server
    SW_METADATA_STORAGE_DB=starwhale

    # *** Required ***
    # The username of the MySQL server.
    SW_METADATA_STORAGE_USER=

    # *** Required ***
    # The password of the MySQL server.
    SW_METADATA_STORAGE_PASSWORD=
    ################################################################################

    # The cache directory for the WAL files. Point it to a mounted volume or host path with enough space.
    # If not set, the WAL files will be saved in the docker runtime layer, and will be lost when the container is restarted.
    SW_DATASTORE_WAL_LOCAL_CACHE_DIR=
    - + \ No newline at end of file diff --git a/next/server/project/index.html b/next/server/project/index.html index 91f2e6306..4d74fd507 100644 --- a/next/server/project/index.html +++ b/next/server/project/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    How to Organize and Manage Resources with Starwhale Projects

    Project is the basic unit for organizing and managing resources (such as models, datasets, runtime environments, etc.). You can create and manage projects based on your needs. For example, you can create projects by business team, product line, or models. One user can create and participate in one or more projects.

    Project type

    There are two types of projects:

    • Private project: The project (and related resources in the project) is only visible to project members with permission. Project members can view or edit the project (as well as associated resources in the project). For more information on roles, please take a look at Roles and permissions in Starwhale.

    • Public project: The project (and related resources in the project) is visible to all Starwhale users. Project members can view or edit the project (as well as associated resources in the project). For more information on roles, please take a look at Roles and permissions in Starwhale.

    Create a project

    1. Click the Create button in the upper right corner of the project list page;
    2. Enter a name for the project. Pay attention to avoiding duplicate names. For more information, please see Names in Starwhale
    3. Select the Project Type, which is defaulted to private project and can be selected as public according to needs;
    4. Fill in the description content;
    5. To finish, Click the Submit button.

    Edit a project

    The name, privacy and description of a project can be edited.

    1. Go to the project list page and find the project that needs to be edited by searching for the project name, then click the Edit Project button;
    2. Edit the items that need to be edited;
    3. Click Submit to save the edited content;
    4. If you're editing multiple projects, repeat steps 1 through 3.

    View a project

    My projects

    On the project list page, only my projects are displayed by default. My projects refer to the projects participated in by the current users as project members or project owners.

    Project sorting

    On the project list page, all projects are supported to be sorted by "Recently visited", "Project creation time from new to old", and "Project creation time from old to new", which can be selected according to your needs.

    Delete a project

    Once a project is deleted, all related resources (such as datasets, models, runtimes, evaluations, etc.) will be deleted and cannot be restored.

    1. Enter the project list page and search for the project name to find the project that needs to be deleted. Hover your mouse over the project you want to delete, then click the Delete button;
    2. Follow the prompts, enter the relevant information, click Confirm to delete the project, or click Cancel to cancel the deletion;
    3. If you are deleting multiple projects, repeat the above steps.

    Manage project member

    Only users with the admin role can assign people to the project. The project owner defaulted to having the project owner role.

    Add a member

    1. Click Manage Members to go to the project member list page;
    2. Click the Add Member button in the upper right corner.
    3. Enter the Username you want to add, select a project role for the user in the project.
    4. Click submit to complete.
    5. If you're adding multiple members, repeat steps 1 through 4.

    Remove a member

    1. On the project list page or project overview tab, click Manage Members to go to the project member list page.
    2. Search for the username you want to delete, then click the Delete button.
    3. Click Yes to delete the user from this project, click No to cancel the deletion.
    4. If you're removing multiple members, repeat steps 1 through 3.

    Edit a member's role

    1. Hover your mouse over the project you want to edit, then click Manage Members to go to the project member list page.
    2. Find the username you want to adjust through searching, click the Project Role drop-down menu, and select a new project role. For more information on roles, please take a look at Roles and permissions in Starwhale.
    - + \ No newline at end of file diff --git a/next/swcli/config/index.html b/next/swcli/config/index.html index 4a6fd3496..ac118e213 100644 --- a/next/swcli/config/index.html +++ b/next/swcli/config/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    Configuration

    Standalone Instance is installed on the user's laptop or development server, providing isolation at the level of Linux/macOX users. Users can install the Starwhale Python package using the pip command and execute any swcli command. After that, they can view their Starwhale configuration in ~/.config/starwhale/config.yaml. In the vast majority of cases, users do not need to manually modify the config.yaml file.

    The ~/.config/starwhale/config.yaml file has permissions set to 0o600 to ensure security, as it contains sensitive information such as encryption keys. Users are advised not to change the file permissions.You could customize your swcli by swci config edit:

    swcli config edit

    config.yaml example

    The typical config.yaml file is as follows:

    • The default instance is local.
    • cloud-cn/cloud-k8s/pre-k8s are the server/cloud instances, local is the standalone instance.
    • The local storage root directory for the Standalone Instance is set to /home/liutianwei/.starwhale.
    current_instance: local
    instances:
    cloud-cn:
    sw_token: ${TOKEN}
    type: cloud
    updated_at: 2022-09-28 18:41:05 CST
    uri: https://cloud.starwhale.cn
    user_name: starwhale
    user_role: normal
    cloud-k8s:
    sw_token: ${TOKEN}
    type: cloud
    updated_at: 2022-09-19 16:10:01 CST
    uri: http://cloud.pre.intra.starwhale.ai
    user_name: starwhale
    user_role: normal
    local:
    current_project: self
    type: standalone
    updated_at: 2022-06-09 16:14:02 CST
    uri: local
    user_name: liutianwei
    pre-k8s:
    sw_token: ${TOKEN}
    type: cloud
    updated_at: 2022-09-19 18:06:50 CST
    uri: http://console.pre.intra.starwhale.ai
    user_name: starwhale
    user_role: normal
    link_auths:
    - ak: starwhale
    bucket: users
    connect_timeout: 10.0
    endpoint: http://10.131.0.1:9000
    read_timeout: 100.0
    sk: starwhale
    type: s3
    storage:
    root: /home/liutianwei/.starwhale
    version: '2.0'

    config.yaml definition

    ParameterDescriptionTypeDefault ValueRequired
    current_instanceThe name of the default instance to use. It is usually set using the swcli instance select command.StringselfYes
    instancesManaged instances, including Standalone, Server and Cloud Instances. There must be at least one Standalone Instance named "local" and one or more Server/Cloud Instances. You can log in to a new instance with swcli instance login and log out from an instance with swcli instance logout.DictStandalone Instance named "local"Yes
    instances.{instance-alias-name}.sw_tokenLogin token for Server/Cloud Instances. It is only effective for Server/Cloud Instances. Subsequent swcli operations on Server/Cloud Instances will use this token. Note that tokens have an expiration time, typically set to one month, which can be configured within the Server/Cloud Instance.StringCloud - Yes, Standalone - No
    instances.{instance-alias-name}.typeType of the instance, currently can only be "cloud" or "standalone".Choice[string]Yes
    instances.{instance-alias-name}.uriFor Server/Cloud Instances, the URI is an http/https address. For Standalone Instances, the URI is set to "local".StringYes
    instances.{instance-alias-name}.user_nameUser's nameStringYes
    instances.{instance-alias-name}.current_projectDefault Project under the current instance. It will be used to fill the "project" field in the URI representation by default. You can set it using the swcli project select command.StringYes
    instances.{instance-alias-name}.user_roleUser's role.StringnormalYes
    instances.{instance-alias-name}.updated_atThe last updated time for this instance configuration.Time format stringYes
    storageSettings related to local storage.DictYes
    storage.rootThe root directory for Standalone Instance's local storage. Typically, if there is insufficient space in the home directory and you manually move data files to another location, you can modify this field.String~/.starwhaleYes
    versionThe version of config.yaml, currently only supports 2.0.String2.0Yes

    You could put starwhale.Link to your assets while the URI in the Link could be whatever(only s3 like or http is implemented) you need, such as s3://10.131.0.1:9000/users/path. However, Links may need to be authed, you could config the auth info in link_auths.

    link_auths:
    - type: s3
    ak: starwhale
    bucket: users
    region: local
    connect_timeout: 10.0
    endpoint: http://10.131.0.1:9000
    read_timeout: 100.0
    sk: starwhale

    Items in link_auths will match the uri in Links automatically. s3 typed link_auth matching Links by looking up bucket and endpoint.

    - + \ No newline at end of file diff --git a/next/swcli/index.html b/next/swcli/index.html index 3aee180bb..acee73ccf 100644 --- a/next/swcli/index.html +++ b/next/swcli/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    Starwhale Client (swcli) User Guide

    The Starwhale Client (swcli) is a command-line tool that enables you to interact with Starwhale instances. You can use swcli to complete almost all tasks in Starwhale. swcli is written in pure python3 (require Python 3.7 | 3.11) so that it can be easily installed by the pip command. Currently, swcli only supports Linux and macOS, Windows is coming soon.

    - + \ No newline at end of file diff --git a/next/swcli/installation/index.html b/next/swcli/installation/index.html index 7f37d5655..21965ac5a 100644 --- a/next/swcli/installation/index.html +++ b/next/swcli/installation/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    Installation Guide

    We can use swcli to complete all tasks for Starwhale Instances. swcli is written by pure python3, which can be installed easily by the pip command.Here are some installation tips that can help you get a cleaner, unambiguous, no dependency conflicts swcli python environment.

    Installing Advice

    DO NOT install Starwhale in your system's global Python environment. It will cause a python dependency conflict problem.

    Prerequisites

    • Python 3.7 ~ 3.11
    • Linux or macOS
    • Conda (optional)

    In the Ubuntu system, you can run the following commands:

    sudo apt-get install python3 python3-venv python3-pip

    #If you want to install multi python versions
    sudo add-apt-repository -y ppa:deadsnakes/ppa
    sudo apt-get update
    sudo apt-get install -y python3.7 python3.8 python3.9 python3-pip python3-venv python3.8-venv python3.7-venv python3.9-venv

    swcli works on macOS. If you run into issues with the default system Python3 on macOS, try installing Python3 through the homebrew:

    brew install python3

    Install swcli

    Install with venv

    python3 -m venv ~/.cache/venv/starwhale
    source ~/.cache/venv/starwhale/bin/activate
    python3 -m pip install starwhale

    swcli --version

    sudo ln -sf "$(which swcli)" /usr/local/bin/

    Install with conda

    conda create --name starwhale --yes  python=3.9
    conda activate starwhale
    python3 -m pip install starwhale

    swcli --version

    sudo ln -sf "$(which swcli)" /usr/local/bin/

    👏 Now, you can use swcli in the global environment.

    Install for the special scenarios

    # for Audio processing
    python -m pip install starwhale[audio]

    # for Image processing
    python -m pip install starwhale[pillow]

    # for swcli model server command
    python -m pip install starwhale[server]

    # for built-in online serving
    python -m pip install starwhale[online-serve]

    # install all dependencies
    python -m pip install starwhale[all]

    Update swcli

    #for venv
    python3 -m pip install --upgrade starwhale

    #for conda
    conda run -n starwhale python3 -m pip install --upgrade starwhale

    Uninstall swcli

    python3 -m pip remove starwhale

    rm -rf ~/.config/starwhale
    rm -rf ~/.starwhale
    - + \ No newline at end of file diff --git a/next/swcli/swignore/index.html b/next/swcli/swignore/index.html index 91f88b91c..21aa9f078 100644 --- a/next/swcli/swignore/index.html +++ b/next/swcli/swignore/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    About the .swignore file

    The .swignore file is similar to .gitignore, .dockerignore, and other files used to define ignored files or dirs. The .swignore files mainly used in the Starwhale Model building process. By default, the swcli model build command or starwhale.model.build() Python SDK will traverse all files in the specified directory and automatically exclude certain known files or directories that are not suitable for inclusion in the model package.

    PATTERN FORMAT

    • Each line in a swignore file specifies a pattern, which matches files and directories.
    • A blank line matches no files, so it can serve as a separator for readability.
    • An asterisk * matches anything except a slash.
    • A line starting with # serves as a comment.
    • Support wildcard expression, for example: *.jpg, .png.

    Auto Ingored files or dirs

    If you want to include the auto ingored files or dirs, you can add --add-all for swcli model build command.

    • __pycache__/
    • *.py[cod]
    • *$py.class
    • venv installation dir
    • conda installation dir

    Example

    Here is the .swignore file used in the MNIST example:

    venv/*
    .git/*
    .history*
    .vscode/*
    .venv/*
    data/*
    .idea/*
    *.py[cod]
    - + \ No newline at end of file diff --git a/next/swcli/uri/index.html b/next/swcli/uri/index.html index 7c8f0c77f..2b6404673 100644 --- a/next/swcli/uri/index.html +++ b/next/swcli/uri/index.html @@ -10,13 +10,13 @@ - +
    Version: WIP

    Starwhale Resources URI

    tip

    Resource URI is widely used in Starwhale client commands. The URI can refer to a resource in the local instance or any other resource in a remote instance. In this way, the Starwhale client can easily manipulate any resource.

    concepts-org.jpg

    Instance URI

    Instance URI can be either:

    • local: standalone instance.
    • [http(s)://]<hostname or ip>[:<port>]: cloud instance with HTTP address.
    • [cloud://]<cloud alias>: cloud or server instance with an alias name, which can be configured in the instance login phase.
    caution

    "local" is different from "localhost". The former means the local standalone instance without a controller, while the latter implies a controller listening at the default port 8082 on the localhost.

    Example:

    # log in Starwhale Cloud; the alias is swcloud
    swcli instance login --username <your account name> --password <your password> https://cloud.starwhale.ai --alias swcloud

    # copy a model from the local instance to the cloud instance
    swcli model copy mnist/version/latest swcloud/project/<your account name>:demo

    # copy a runtime to a Starwhale Server instance: http://localhost:8081
    swcli runtime copy pytorch/version/v1 http://localhost:8081/project/<your account name>:demo

    Project URI

    Project URI is in the format [<Instance URI>/project/]<project name>. If the instance URI is not specified, use the current instance instead.

    Example:

    swcli project select self   # select the self project in the current instance
    swcli project info local/project/self # inspect self project info in the local instance

    Model/Dataset/Runtime URI

    • Model URI: [<Project URI>/model/]<model name>[/version/<version id|tag>].
    • Dataset URI: [<Project URI>/dataset/]<dataset name>[/version/<version id|tag>].
    • Runtime URI: [<Project URI>/runtime/]<runtime name>[/version/<version id|tag>].
    tip
    • swcli supports human-friendly short version id. You can type the first few characters of the version id, provided it is at least four characters long and unambiguous. However, the recover command must use the complete version id.
    • If the project URI is not specified, the default project will be used.
    • You can always use the version tag instead of the version id.

    Example:

    swcli model info mnist/version/hbtdenjxgm4ggnrtmftdgyjzm43tioi  # inspect model info, model name: mnist, version:hbtdenjxgm4ggnrtmftdgyjzm43tioi
    swcli model remove mnist/version/hbtdenj # short version
    swcli model info mnist # inspect mnist model info
    swcli model run mnist --runtime pytorch-mnist --dataset mnist # use the default latest tag

    Job URI

    • format: [<Project URI>/job/]<job id>.
    • If the project URI is not specified, the default project will be used.

    Example:

    swcli job info mezdayjzge3w   # Inspect mezdayjzge3w version in default instance and default project
    swcli job info local/project/self/job/mezday # Inspect the local instance, self project, with short job id:mezday

    The default instance

    When the instance part of a project URI is omitted, the default instance is used instead. The default instance is the one selected by the swcli instance login or swcli instance use command.

    The default project

    When the project parts of Model/Dataset/Runtime/Evaluation URIs are omitted, the default project is used instead. The default project is the one selected by the swcli project use command.

    - + \ No newline at end of file diff --git a/reference/sdk/dataset/index.html b/reference/sdk/dataset/index.html index fe88d910c..8d7987467 100644 --- a/reference/sdk/dataset/index.html +++ b/reference/sdk/dataset/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    Starwhale Dataset SDK

    dataset

    Get starwhale.Dataset object, by creating new datasets or loading existing datasets.

    @classmethod
    def dataset(
    cls,
    uri: t.Union[str, Resource],
    create: str = _DatasetCreateMode.auto,
    readonly: bool = False,
    ) -> Dataset:

    Parameters

    • uri: (str or Resource, required)
      • The dataset uri or Resource object.
    • create: (str, optional)
      • The mode of dataset creating. The options are auto, empty and forbid.
        • auto mode: If the dataset already exists, creation is ignored. If it does not exist, the dataset is created automatically.
        • empty mode: If the dataset already exists, an Exception is raised; If it does not exist, an empty dataset is created. This mode ensures the creation of a new, empty dataset.
        • forbid mode: If the dataset already exists, nothing is done.If it does not exist, an Exception is raised. This mode ensures the existence of the dataset.
      • The default is auto.
    • readonly: (bool, optional)
      • For an existing dataset, you can specify the readonly=True argument to ensure the dataset is in readonly mode.
      • Default is False.

    Examples

    from starwhale import dataset, Image

    # create a new dataset named mnist, and add a row into the dataset
    # dataset("mnist") is equal to dataset("mnist", create="auto")
    ds = dataset("mnist")
    ds.exists() # return False, "mnist" dataset is not existing.
    ds.append({"img": Image(), "label": 1})
    ds.commit()
    ds.close()

    # load a cloud instance dataset in readonly mode
    ds = dataset("cloud://remote-instance/project/starwhale/dataset/mnist", readonly=True)
    labels = [row.features.label in ds]
    ds.close()

    # load a read/write dataset with a specified version
    ds = dataset("mnist/version/mrrdczdbmzsw")
    ds[0].features.label = 1
    ds.commit()
    ds.close()

    # create an empty dataset
    ds = dataset("mnist-empty", create="empty")

    # ensure the dataset existence
    ds = dataset("mnist-existed", create="forbid")

    class starwhale.Dataset

    starwhale.Dataset implements the abstraction of a Starwhale dataset, and can operate on datasets in Standalone/Server/Cloud instances.

    from_huggingface

    from_huggingface is a classmethod that can convert a Huggingface dataset into a Starwhale dataset.

    def from_huggingface(
    cls,
    name: str,
    repo: str,
    subset: str | None = None,
    split: str | None = None,
    revision: str = "main",
    alignment_size: int | str = D_ALIGNMENT_SIZE,
    volume_size: int | str = D_FILE_VOLUME_SIZE,
    mode: DatasetChangeMode | str = DatasetChangeMode.PATCH,
    cache: bool = True,
    tags: t.List[str] | None = None,
    ) -> Dataset:

    Parameters

    • name: (str, required)
      • dataset name.
    • repo: (str, required)
    • subset: (str, optional)
      • The subset name. If the huggingface dataset has multiple subsets, you must specify the subset name.
    • split: (str, optional)
      • The split name. If the split name is not specified, the all splits dataset will be built.
    • revision: (str, optional)
      • The huggingface datasets revision. The default value is main. If the split name is not specified, the all splits dataset will be built.
    • alignment_size: (int|str, optional)
      • The blob alignment size.
      • The default value is 128 Bytes.
    • volume_size: (int|str, optional)
      • The maximum size of a dataset blob file. A new blob file will be generated when the size exceeds this limit.
      • The default value is 64MB.
    • mode: (str|DatasetChangeMode, optional)
      • The dataset change mode. The default value is patch. Mode choices are patch and overwrite.
    • cache: (bool, optional)
      • Whether to use huggingface dataset cache(download + local hf dataset).
      • The default value is True.
    • tags: (List[str], optional)
      • The user custom tags of the dataset.

    Examples

    from starwhale import Dataset
    myds = Dataset.from_huggingface("mnist", "mnist")
    print(myds[0])
    from starwhale import Dataset
    myds = Dataset.from_huggingface("mmlu", "cais/mmlu", subset="anatomy", split="auxiliary_train", revision="7456cfb")

    from_json

    from_json is a classmethod that can convert a json text into a Starwhale dataset.

    @classmethod
    def from_json(
    cls,
    name: str,
    json_text: str,
    field_selector: str = "",
    alignment_size: int | str = D_ALIGNMENT_SIZE,
    volume_size: int | str = D_FILE_VOLUME_SIZE,
    mode: DatasetChangeMode | str = DatasetChangeMode.PATCH,
    tags: t.List[str] | None = None,
    ) -> Dataset:

    Parameters

    • name: (str, required)
      • Dataset name.
    • json_text: (str, required)
      • A json string. The from_json function deserializes this string into Python objects to start building the Starwhale dataset.
    • field_selector: (str, optional)
      • The filed from which you would like to extract dataset array items.
      • The default value is "" which indicates that the json object is an array contains all the items.
    • alignment_size: (int|str, optional)
      • The blob alignment size.
      • The default value is 128 Bytes.
    • volume_size: (int|str, optional)
      • The maximum size of a dataset blob file. A new blob file will be generated when the size exceeds this limit.
      • The default value is 64MB.
    • mode: (str|DatasetChangeMode, optional)
      • The dataset change mode. The default value is patch. Mode choices are patch and overwrite.
    • tags: (List[str], optional)
      • The user custom tags of the dataset.

    Examples

    from starwhale import Dataset
    myds = Dataset.from_json(
    name="translation",
    json_text='[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]'
    )
    print(myds[0].features.en)
    from starwhale import Dataset
    myds = Dataset.from_json(
    name="translation",
    json_text='{"content":{"child_content":[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]}}',
    field_selector="content.child_content"
    )
    print(myds[0].features["zh-cn"])

    from_folder

    from_folder is a classmethod that can read Image/Video/Audio data from a specified directory and automatically convert them into a Starwhale dataset. This function supports the following features:

    • It can recursively search the target directory and its subdirectories
    • Supports extracting three types of files:
      • image: Supports png/jpg/jpeg/webp/svg/apng image types. Image files will be converted to Starwhale.Image type.
      • video: Supports mp4/webm/avi video types. Video files will be converted to Starwhale.Video type.
      • audio: Supports mp3/wav audio types. Audio files will be converted to Starwhale.Audio type.
    • Each file corresponds to one record in the dataset, with the file stored in the file field.
    • If auto_label=True, the parent directory name will be used as the label for that record, stored in the label field. Files in the root directory will not be labeled.
    • If a txt file with the same name as an image/video/audio file exists, its content will be stored as the caption field in the dataset.
    • If metadata.csv or metadata.jsonl exists in the root directory, their content will be read automatically and associated to records by file path as meta information in the dataset.
      • metadata.csv and metadata.jsonl are mutually exclusive. An exception will be thrown if both exist.
      • Each record in metadata.csv and metadata.jsonl must contain a file_name field pointing to the file path.
      • metadata.csv and metadata.jsonl are optional for dataset building.
    @classmethod
    def from_folder(
    cls,
    folder: str | Path,
    kind: str | DatasetFolderSourceType,
    name: str | Resource = "",
    auto_label: bool = True,
    alignment_size: int | str = D_ALIGNMENT_SIZE,
    volume_size: int | str = D_FILE_VOLUME_SIZE,
    mode: DatasetChangeMode | str = DatasetChangeMode.PATCH,
    tags: t.List[str] | None = None,
    ) -> Dataset:

    Parameters

    • folder: (str|Path, required)
      • The folder path from which you would like to create this dataset.
    • kind: (str|DatasetFolderSourceType, required)
      • The dataset source type you would like to use, the choices are: image, video and audio.
      • Recursively searching for files of the specified kind in folder. Other file types will be ignored.
    • name: (str|Resource, optional)
      • The dataset name you would like to use.
      • If not specified, the name is the folder name.
    • auto_label: (bool, optional)
      • Whether to auto label by the sub-folder name.
      • The default value is True.
    • alignment_size: (int|str, optional)
      • The blob alignment size.
      • The default value is 128 Bytes.
    • volume_size: (int|str, optional)
      • The maximum size of a dataset blob file. A new blob file will be generated when the size exceeds this limit.
      • The default value is 64MB.
    • mode: (str|DatasetChangeMode, optional)
      • The dataset change mode. The default value is patch. Mode choices are patch and overwrite.
    • tags: (List[str], optional)
      • The user custom tags of the dataset.

    Examples ${folder-example}

    • Example for the normal function calling

      from starwhale import Dataset

      # create a my-image-dataset dataset from /path/to/image folder.
      ds = Dataset.from_folder(
      folder="/path/to/image",
      kind="image",
      name="my-image-dataset"
      )
    • Example for caption

      folder/dog/1.png
      folder/dog/1.txt

      1.txt content will be used as the caption of 1.png.

    • Example for metadata

      metadata.csv:

      file_name, caption
      1.png, dog
      2.png, cat

      metadata.jsonl:

      {"file_name": "1.png", "caption": "dog"}
      {"file_name": "2.png", "caption": "cat"}
    • Example for auto-labeling

      The following structure will create a dataset with 2 labels: "cat" and "dog", 4 images in total.

      folder/dog/1.png
      folder/cat/2.png
      folder/dog/3.png
      folder/cat/4.png

    __iter__

    __iter__ a method that iter the dataset rows.

    from starwhale import dataset

    ds = dataset("mnist")

    for item in ds:
    print(item.index)
    print(item.features.label) # label and img are the features of mnist.
    print(item.features.img)

    batch_iter

    batch_iter is a method that iter the dataset rows in batch.

    def batch_iter(
    self, batch_size: int = 1, drop_not_full: bool = False
    ) -> t.Iterator[t.List[DataRow]]:

    Parameters

    • batch_size: (int, optional)
      • batch size. The default value is 1.
    • drop_not_full: (bool, optional)
      • Whether the last batch of data, with a size smaller than batch_size, it will be discarded.
      • The default value is False.

    Examples

    from starwhale import dataset

    ds = dataset("mnist")
    for batch_rows in ds.batch_iter(batch_size=2):
    assert len(batch_rows) == 2
    print(batch_rows[0].features)

    __getitem__

    __getitem__ is a method that allows retrieving certain rows of data from the dataset, with usage similar to Python dict and list types.

    from starwhale import dataset

    ds = dataset("mock-int-index")

    # if the index type is string
    ds["str_key"] # get the DataRow by the "str_key" string key
    ds["start":"end"] # get a slice of the dataset by the range ("start", "end")

    ds = dataset("mock-str-index")
    # if the index type is int
    ds[1] # get the DataRow by the 1 int key
    ds[1:10:2] # get a slice of the dataset by the range (1, 10), step is 2

    __setitem__

    __setitem__ is a method that allows updating rows of data in the dataset, with usage similar to Python dicts. __setitem__ supports multi-threaded parallel data insertion.

    def __setitem__(
    self, key: t.Union[str, int], value: t.Union[DataRow, t.Tuple, t.Dict]
    ) -> None:

    Parameters

    • key: (int|str, required)
      • key is the index for each row in the dataset. The type is int or str, but a dataset only accepts one type.
    • value: (DataRow|tuple|dict, required)
      • value is the features for each row in the dataset, using a Python dict is generally recommended.

    Examples

    • Normal insertion

    Insert two rows into the test dataset, with index test and test2 repectively:

    from starwhale import dataset

    with dataset("test") as ds:
    ds["test"] = {"txt": "abc", "int": 1}
    ds["test2"] = {"txt": "bcd", "int": 2}
    ds.commit()
    • Parallel insertion
    from starwhale import dataset, Binary
    from concurrent.futures import as_completed, ThreadPoolExecutor

    ds = dataset("test")

    def _do_append(_start: int) -> None:
    for i in range(_start, 100):
    ds.append((i, {"data": Binary(), "label": i}))

    pool = ThreadPoolExecutor(max_workers=10)
    tasks = [pool.submit(_do_append, i * 10) for i in range(0, 9)]

    ds.commit()
    ds.close()

    __delitem__

    __delitem__ is a method to delete certain rows of data from the dataset.

    def __delitem__(self, key: _ItemType) -> None:
    from starwhale import dataset

    ds = dataset("existed-ds")
    del ds[6:9]
    del ds[0]
    ds.commit()
    ds.close()

    append

    append is a method to append data to a dataset, similar to the append method for Python lists.

    • Adding features dict, each row is automatically indexed with int starting from 0 and incrementing.

      from starwhale import dataset, Image

      with dataset("new-ds") as ds:
      for i in range(0, 100):
      ds.append({"label": i, "image": Image(f"folder/{i}.png")})
      ds.commit()
    • By appending the index and features dictionary, the index of each data row in the dataset will not be handled automatically.

      from dataset import dataset, Image

      with dataset("new-ds") as ds:
      for i in range(0, 100):
      ds.append((f"index-{i}", {"label": i, "image": Image(f"folder/{i}.png")}))

      ds.commit()

    extend

    extend is a method to bulk append data to a dataset, similar to the extend method for Python lists.

    from starwhale import dataset, Text

    ds = dataset("new-ds")
    ds.extend([
    (f"label-{i}", {"text": Text(), "label": i}) for i in range(0, 10)
    ])
    ds.commit()
    ds.close()

    commit

    commit is a method that flushes the current cached data to storage when called, and generates a dataset version. This version can then be used to load the corresponding dataset content afterwards.

    For a dataset, if some data is added without calling commit, but close is called or the process exits directly instead, the data will still be written to the dataset, just without generating a new version.

    @_check_readonly
    def commit(
    self,
    tags: t.Optional[t.List[str]] = None,
    message: str = "",
    force_add_tags: bool = False,
    ignore_add_tags_errors: bool = False,
    ) -> str:

    Parameters

    • tags: (list(str), optional)
      • tag as a list
    • message: (str, optional)
      • commit message. The default value is empty.
    • force_add_tags: (bool, optional)
      • For server/cloud instances, when adding labels to this version, if a label has already been applied to other dataset versions, you can use the force_add_tags=True parameter to forcibly add the label to this version, otherwise an exception will be thrown.
      • The default is False.
    • ignore_add_tags_errors: (bool, optional)
      • Ignore any exceptions thrown when adding labels.
      • The default is False.

    Examples

    from starwhale import dataset
    with dataset("mnist") as ds:
    ds.append({"label": 1})
    ds.commit(message="init commit")

    readonly

    readonly is a property attribute indicating if the dataset is read-only, it returns a bool value.

    from starwhale import dataset
    ds = dataset("mnist", readonly=True)
    assert ds.readonly

    loading_version

    loading_version is a property attribute, string type.

    • When loading an existing dataset, the loading_version is the related dataset version.
    • When creating a non-existed dataset, the loading_version is equal to the pending_commit_version.

    pending_commit_version

    pending_commit_version is a property attribute, string type. When you call the commit function, the pending_commit_version will be recorded in the Standalone instance ,Server instance or Cloud instance.

    committed_version

    committed_version is a property attribute, string type. After the commit function is called, the committed_version will come out, it is equal to the pending_commit_version. Accessing this attribute without calling commit first will raise an exception.

    remove

    remove is a method equivalent to the swcli dataset remove command, it can delete a dataset.

    def remove(self, force: bool = False) -> None:

    recover

    recover is a method equivalent to the swcli dataset recover command, it can recover a soft-deleted dataset that has not been run garbage collection.

    def recover(self, force: bool = False) -> None:

    summary

    summary is a method equivalent to the swcli dataset summary command, it returns summary information of the dataset.

    def summary(self) -> t.Optional[DatasetSummary]:

    history

    history is a method equivalent to the swcli dataset history command, it returns the history records of the dataset.

    def history(self) -> t.List[t.Dict]:

    flush

    flush is a method that flushes temporarily cached data from memory to persistent storage. The commit and close methods will automatically call flush.

    close

    close is a method that closes opened connections related to the dataset. Dataset also implements contextmanager, so datasets can be automatically closed using with syntax without needing to explicitly call close.

    from starwhale import dataset

    ds = dataset("mnist")
    ds.close()

    with dataset("mnist") as ds:
    print(ds[0])

    head is a method to show the first n rows of a dataset, equivalent to the swcli dataset head command.

    def head(self, n: int = 5, skip_fetch_data: bool = False) -> List[DataRow]:

    fetch_one

    fetch_one is a method to get the first record in a dataset, similar to head(n=1)[0].

    list

    list is a class method to list Starwhale datasets under a project URI, equivalent to the swcli dataset list command.

    @classmethod
    def list(
    cls,
    project_uri: Union[str, Project] = "",
    fullname: bool = False,
    show_removed: bool = False,
    page_index: int = DEFAULT_PAGE_IDX,
    page_size: int = DEFAULT_PAGE_SIZE,
    ) -> Tuple[DatasetListType, Dict[str, Any]]:

    copy

    copy is a method to copy a dataset to another instance, equivalent to the swcli dataset copy command.

    def copy(
    self,
    dest_uri: str,
    dest_local_project_uri: str = "",
    force: bool = False,
    mode: str = DatasetChangeMode.PATCH.value,
    ignore_tags: t.List[str] | None = None,
    ) -> None:

    Parameters

    • dest_uri: (str, required)
      • Dataset URI
    • dest_local_project_uri: (str, optional)
      • When copy the remote dataset into local, the parameter can set for the Project URI.
    • force: (bool, optional)
      • Whether to forcibly overwrite the dataset if there is already one with the same version on the target instance.
      • The default value is False.
      • When the tags are already used for the other dataset version in the dest instance, you should use force option or adjust the tags.
    • mode: (str, optional)
      • Dataset copy mode, default is 'patch'. Mode choices are: 'patch', 'overwrite'.
      • patch: Patch mode, only update the changed rows and columns for the remote dataset.
      • overwrite: Overwrite mode, update records and delete extraneous rows from the remote dataset.
    • ignore_tags (List[str], optional)
      • Ignore tags when copying.
      • In default, copy dataset with all user custom tags.
      • latest and ^v\d+$ are the system builtin tags, they are ignored automatically.

    Examples

    from starwhale import dataset
    ds = dataset("mnist")
    ds.copy("cloud://remote-instance/project/starwhale")

    to_pytorch

    to_pytorch is a method that can convert a Starwhale dataset to a Pytorch torch.utils.data.Dataset, which can then be passed to torch.utils.data.DataLoader for use.

    It should be noted that the to_pytorch function returns a Pytorch IterableDataset.

    def to_pytorch(
    self,
    transform: t.Optional[t.Callable] = None,
    drop_index: bool = True,
    skip_default_transform: bool = False,
    ) -> torch.utils.data.Dataset:

    Parameters

    • transform: (callable, optional)
      • A transform function for input data.
    • drop_index: (bool, optional)
      • Whether to drop the index column.
    • skip_default_transform: (bool, optional)
      • If transform is not set, by default the built-in Starwhale transform function will be used to transform the data. This can be disabled with the skip_default_transform parameter.

    Examples

    import torch.utils.data as tdata
    from starwhale import dataset

    ds = dataset("mnist")

    torch_ds = ds.to_pytorch()
    torch_loader = tdata.DataLoader(torch_ds, batch_size=2)
    import torch.utils.data as tdata
    from starwhale import dataset

    with dataset("mnist") as ds:
    for i in range(0, 10):
    ds.append({"txt": Text(f"data-{i}"), "label": i})

    ds.commit()

    def _custom_transform(data: t.Any) -> t.Any:
    data = data.copy()
    txt = data["txt"].to_str()
    data["txt"] = f"custom-{txt}"
    return data

    torch_loader = tdata.DataLoader(
    dataset(ds.uri).to_pytorch(transform=_custom_transform), batch_size=1
    )
    item = next(iter(torch_loader))
    assert isinstance(item["label"], torch.Tensor)
    assert item["txt"][0] in ("custom-data-0", "custom-data-1")

    to_tensorflow

    to_tensorflow is a method that can convert a Starwhale dataset to a Tensorflow tensorflow.data.Dataset.

    def to_tensorflow(self, drop_index: bool = True) -> tensorflow.data.Dataset:

    Parameters

    • drop_index: (bool, optional)
      • Whether to drop the index column.

    Examples

    from starwhale import dataset
    import tensorflow as tf

    ds = dataset("mnist")
    tf_ds = ds.to_tensorflow(drop_index=True)
    assert isinstance(tf_ds, tf.data.Dataset)

    with_builder_blob_config

    with_builder_blob_config is a method to set blob-related attributes in a Starwhale dataset. It needs to be called before making data changes.

    def with_builder_blob_config(
    self,
    volume_size: int | str | None = D_FILE_VOLUME_SIZE,
    alignment_size: int | str | None = D_ALIGNMENT_SIZE,
    ) -> Dataset:

    Parameters

    • alignment_size: (int|str, optional)
      • The blob alignment size.
      • The default value is 128 Bytes.
    • volume_size: (int|str, optional)
      • The maximum size of a dataset blob file. A new blob file will be generated when the size exceeds this limit.
      • The default value is 64MB.

    Examples

    from starwhale import dataset, Binary

    ds = dataset("mnist").with_builder_blob_config(volume_size="32M", alignment_size=128)
    ds.append({"data": Binary(b"123")})
    ds.commit()
    ds.close()

    with_loader_config

    with_loader_config is a method to set parameters for the Starwhale dataset loader process.

    def with_loader_config(
    self,
    num_workers: t.Optional[int] = None,
    cache_size: t.Optional[int] = None,
    field_transformer: t.Optional[t.Dict] = None,
    ) -> Dataset:

    Parameters

    • num_workers: (int, optional)
      • The workers number for loading dataset.
      • The default value is 2.
    • cache_size: (int, optional)
      • Prefetched data rows.
      • The default value is 20.
    • field_transformer: (dict, optional)
      • features name transform dict.

    Examples

    from starwhale import Dataset, dataset
    Dataset.from_json(
    "translation",
    '[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]'
    )
    myds = dataset("translation").with_loader_config(field_transformer={"en": "en-us"})
    assert myds[0].features["en-us"] == myds[0].features["en"]
    from starwhale import Dataset, dataset
    Dataset.from_json(
    "translation2",
    '[{"content":{"child_content":[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]}}]'
    )
    myds = dataset("translation2").with_loader_config(field_transformer={"content.child_content[0].en": "en-us"})
    assert myds[0].features["en-us"] == myds[0].features["content"]["child_content"][0]["en"]
    - + \ No newline at end of file diff --git a/reference/sdk/evaluation/index.html b/reference/sdk/evaluation/index.html index b441e2b16..fbfeae849 100644 --- a/reference/sdk/evaluation/index.html +++ b/reference/sdk/evaluation/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    Starwhale Model Evaluation SDK

    @evaluation.predict

    The @evaluation.predict decorator defines the inference process in the Starwhale Model Evaluation, similar to the map phase in MapReduce. It contains the following core features:

    • On the Server instance, require the resources needed to run.
    • Automatically read the local or remote datasets, and pass the data in the datasets one by one or in batches to the function decorated by evaluation.predict.
    • By the replicas setting, implement distributed dataset consumption to horizontally scale and shorten the time required for the model evaluation tasks.
    • Automatically store the return values of the function and the input features of the dataset into the results table, for display in the Web UI and further use in the evaluate phase.
    • The decorated function is called once for each single piece of data or each batch, to complete the inference process.

    Parameters

    • resources: (dict, optional)
      • Defines the resources required by each predict task when running on the Server instance, including memory, cpu, and nvidia.com/gpu.
      • memory: The unit is Bytes, int and float types are supported.
        • Supports setting request and limit as a dictionary, e.g. resources={"memory": {"request": 100 * 1024, "limit": 200 * 1024}}.
        • If only a single number is set, the Python SDK will automatically set request and limit to the same value, e.g. resources={"memory": 100 * 1024} is equivalent to resources={"memory": {"request": 100 * 1024, "limit": 100 * 1024}}.
      • cpu: The unit is the number of CPU cores, int and float types are supported.
        • Supports setting request and limit as a dictionary, e.g. resources={"cpu": {"request": 1, "limit": 2}}.
        • If only a single number is set, the SDK will automatically set request and limit to the same value, e.g. resources={"cpu": 1.5} is equivalent to resources={"cpu": {"request": 1.5, "limit": 1.5}}.
      • nvidia.com/gpu: The unit is the number of GPUs, int type is supported.
        • nvidia.com/gpu does not support setting request and limit, only a single number is supported.
      • Note: The resources parameter currently only takes effect on the Server instances. For the Cloud instances, the same can be achieved by selecting the corresponding resource pool when submitting the evaluation task. Standalone instances do not support this feature at all.
    • replicas: (int, optional)
      • The number of replicas to run predict.
      • predict defines a Step, in which there are multiple equivalent Tasks. Each Task runs on a Pod in Cloud/Server instances, and a Thread in Standalone instances.
      • When multiple replicas are specified, they are equivalent and will jointly consume the selected dataset to achieve distributed dataset consumption. It can be understood that a row in the dataset will only be read by one predict replica.
      • The default is 1.
    • batch_size: (int, optional)
      • Batch size for passing data from the dataset into the function.
      • The default is 1.
    • fail_on_error: (bool, optional)
      • Whether to interrupt the entire model evaluation when the decorated function throws an exception. If you expect some "exceptional" data to cause evaluation failures but don't want to interrupt the overall evaluation, you can set fail_on_error=False.
      • The default is True.
    • auto_log: (bool, optional)
      • Whether to automatically log the return values of the function and the input features of the dataset to the results table.
      • The default is True.
    • log_mode: (str, optional)
      • When auto_log=True, you can set log_mode to define logging the return values in plain or pickle format.
      • The default is pickle.
    • log_dataset_features: (List[str], optional)
      • When auto_log=True, you can selectively log certain features from the dataset via this parameter.
      • By default, all features will be logged.
    • needs: (List[Callable], optional)
      • Defines the prerequisites for this task to run, can use the needs syntax to implement DAG.
      • needs accepts functions decorated by @evaluation.predict, @evaluation.evaluate, and @handler.
      • The default is empty, i.e. does not depend on any other tasks.

    Input

    The decorated functions need to define some input parameters to accept dataset data, etc. They contain the following patterns:

    • data:

      • data is a dict type that can read the features of the dataset.
      • When batch_size=1 or batch_size is not set, the label feature can be read through data['label'] or data.label.
      • When batch_size is set to > 1, data is a list.
      from starwhale import evaluation

      @evaluation.predict
      def predict(data):
      print(data['label'])
      print(data.label)
    • data + external:

      • data is a dict type that can read the features of the dataset.
      • external is also a dict, including: index, index_with_dataset, dataset_info, context and dataset_uri keys. The attributes can be used for the further fine-grained processing.
        • index: The index of the dataset row.
        • index_with_dataset: The index with the dataset info.
        • dataset_info: starwhale.core.dataset.tabular.TabularDatasetInfo Class.
        • context: starwhale.Context Class.
        • dataset_uri: starwhale.nase.uri.resource.Resource Class.
      from starwhale import evaluation

      @evaluation.predict
      def predict(data, external):
      print(data['label'])
      print(data.label)
      print(external["context"])
      print(external["dataset_uri"])
    • data + **kw:

      • data is a dict type that can read the features of the dataset.
      • kw is a dict that contains external.
      from starwhale import evaluation

      @evaluation.predict
      def predict(data, **kw):
      print(kw["external"]["context"])
      print(kw["external"]["dataset_uri"])
    • *args + **kwargs:

      • The first argument of args list is data.
      from starwhale import evaluation

      @evaluation.predict
      def predict(*args, **kw):
      print(args[0].label)
      print(args[0]["label"])
      print(kw["external"]["context"])
    • **kwargs:

      from starwhale import evaluation

      @evaluation.predict
      def predict(**kw):
      print(kw["data"].label)
      print(kw["data"]["label"])
      print(kw["external"]["context"])
    • *args:

      • *args does not contain external.
      from starwhale import evaluation

      @evaluation.predict
      def predict(*args):
      print(args[0].label)
      print(args[0]["label"])

    Examples

    from starwhale import evaluation

    @evaluation.predict
    def predict_image(data):
    ...

    @evaluation.predict(
    dataset="mnist/version/latest",
    batch_size=32,
    replicas=4,
    needs=[predict_image],
    )
    def predict_batch_images(batch_data)
    ...

    @evaluation.predict(
    resources={"nvidia.com/gpu": 1,
    "cpu": {"request": 1, "limit": 2},
    "memory": 200 * 1024}, # 200MB
    log_mode="plain",
    )
    def predict_with_resources(data):
    ...

    @evaluation.predict(
    replicas=1,
    log_mode="plain",
    log_dataset_features=["txt", "img", "label"],
    )
    def predict_with_selected_features(data):
    ...

    @evaluation.evaluate

    @evaluation.evaluate is a decorator that defines the evaluation process in the Starwhale Model evaluation, similar to the reduce phase in MapReduce. It contains the following core features:

    • On the Server instance, apply for the resources.
    • Read the data recorded in the results table automatically during the predict phase, and pass it into the function as an iterator.
    • The evaluate phase will only run one replica, and cannot define the replicas parameter like the predict phase.

    Parameters

    • resources: (dict, optional)
      • Consistent with the resources parameter definition in @evaluation.predict.
    • needs: (List[Callable], optional)
      • Consistent with the needs parameter definition in @evaluation.predict.
      • In the common case, it will depend on a function decorated by @evaluation.predict.
    • use_predict_auto_log: (bool, optional)
      • Defaults to True, passes an iterator that can traverse the predict results to the function.

    Input

    • When use_predict_auto_log=True (default), pass an iterator that can traverse the predict results into the function.
      • The iterated object is a dictionary containing two keys: output and input.
        • output is the element returned by the predict stage function.
        • input is the features of the corresponding dataset during the inference process, which is a dictionary type.
    • When use_predict_auto_log=False, do not pass any parameters into the function.

    Examples

    from starwhale import evaluation

    @evaluation.evaluate(needs=[predict_image])
    def evaluate_results(predict_result_iter):
    ...

    @evaluation.evaluate(
    use_predict_auto_log=False,
    needs=[predict_image],
    )
    def evaluate_results():
    ...

    class Evaluation

    starwhale.Evaluation implements the abstraction for Starwhale Model Evaluation, and can perform operations like logging and scanning for Model Evaluation on Standalone/Server/Cloud instances, to record and retrieve metrics.

    __init__

    __init__ function initializes Evaluation object.

    class Evaluation
    def __init__(self, id: str, project: Project | str) -> None:

    Parameters

    • id: (str, required)
      • The UUID of Model Evaluation that is generated by Starwhale automatically.
    • project: (Project|str, required)
      • Project object or Project URI str.

    Example

    from starwhale import Evaluation

    standalone_e = Evaluation("fcd1206bf1694fce8053724861c7874c", project="self")
    server_e = Evaluation("fcd1206bf1694fce8053724861c7874c", project="cloud://server/project/starwhale:starwhale")
    cloud_e = Evaluation("2ddab20df9e9430dbd73853d773a9ff6", project="https://cloud.starwhale.cn/project/starwhale:llm-leaderboard")

    from_context

    from_context is a classmethod that obtains the Evaluation object under the current Context. from_context can only take effect under the task runtime environment. Calling this method in a non-task runtime environment will raise a RuntimeError exception, indicating that the Starwhale Context has not been properly set.

    @classmethod
    def from_context(cls) -> Evaluation:

    Example

    from starwhale import Evaluation

    with Evaluation.from_context() as e:
    e.log("label/1", 1, {"loss": 0.99, "accuracy": 0.98})

    log

    log is a method that logs evaluation metrics to a specific table, which can then be viewed on the Server/Cloud instance's web page or through the scan method.

    def log(
    self, category: str, id: t.Union[str, int], metrics: t.Dict[str, t.Any]
    ) -> None:

    Parameters

    • category: (str, required)
      • The category of the logged metrics, which will be used as the suffix of the Starwhale Datastore table name.
      • Each category corresponds to a Starwhale Datastore table. These tables will be isolated by the evaluation task ID and will not affect each other.
    • id: (str|int, required)
      • The ID of the logged record, unique within the table.
      • For the same table, only str or int can be used as the ID type.
    • metrics: (dict, required)
      • A dict to log metrics in key-value format.
      • Keys are of str type.
      • Values can be constant types like int, float, str, bytes, bool, or compound types like tuple, list, dict. It also supports logging Artifacts types like Starwhale.Image, Starwhale.Video, Starwhale.Audio, Starwhale.Text, Starwhale.Binary.
        • When the value contains dict type, the Starwhale SDK will automatically flatten the dict for better visualization and metric comparison.
        • For example, if metrics is {"test": {"loss": 0.99, "prob": [0.98,0.99]}, "image": [Image, Image]}, it will be stored as {"test/loss": 0.99, "test/prob": [0.98, 0.99], "image/0": Image, "image/1": Image} after flattening.

    Example

    from starwhale import Evaluation

    evaluation_store = Evaluation.from_context()

    evaluation_store.log("label/1", 1, {"loss": 0.99, "accuracy": 0.98})
    evaluation_store.log("ppl", "1", {"a": "test", "b": 1})

    scan

    scan is a method that returns an iterator for reading data from certain model evaluation tables.

    def scan(
    self,
    category: str,
    start: t.Any = None,
    end: t.Any = None,
    keep_none: bool = False,
    end_inclusive: bool = False,
    ) -> t.Iterator:

    Parameters

    • category: (str, required)
      • Same meaning as the category parameter in the log method.
    • start: (Any, optional)
      • Start key, if not specified, start from the first data item in the table.
    • end: (Any, optional)
      • End key, if not specified, iterate to the end of the table.
    • keep_none: (bool, optional)
      • Whether to return columns with None values, not returned by default.
    • end_inclusive: (bool, optional)
      • Whether to include the row corresponding to end, not included by default.

    Example

    from starwhale import Evaluation

    evaluation_store = Evaluation(id="2ddab20df9e9430dbd73853d773a9ff6", project="https://cloud.starwhale.cn/projects/349")
    results = [data for data in evaluation_store.scan("label/0")]

    flush

    flush is a method that can immediately flush the metrics logged by the log method to the datastore and oss storage. If the flush method is not called, Evaluation will automatically flush data to storage when it is finally closed.

    def flush(self, category: str, artifacts_flush: bool = True) -> None

    Parameters

    • category: (str, required)
      • Same meaning as the category parameter in the log method.
    • artifacts_flush: (bool, optional)
      • Whether to dump artifact data to blob files and upload them to related storage. Default is True.

    log_result

    log_result is a method that logs evaluation metrics to the results table, equivalent to calling the log method with category set to results. The results table is generally used to store inference results. By default, @starwhale.predict will store the return value of the decorated function in the results table, you can also manually store using log_results.

    def log_result(self, id: t.Union[str, int], metrics: t.Dict[str, t.Any]) -> None:

    Parameters

    • id: (str|int, required)
      • The ID of the record, unique within the results table.
      • For the results table, only str or int can be used as the ID type.
    • metrics: (dict, required)
      • Same definition as the metrics parameter in the log method.

    Example

    from starwhale import Evaluation

    evaluation_store = Evaluation(id="2ddab20df9e9430dbd73853d773a9ff6", project="self")
    evaluation_store.log_result(1, {"loss": 0.99, "accuracy": 0.98})
    evaluation_store.log_result(2, {"loss": 0.98, "accuracy": 0.99})

    scan_results

    scan_results is a method that returns an iterator for reading data from the results table.

    def scan_results(
    self,
    start: t.Any = None,
    end: t.Any = None,
    keep_none: bool = False,
    end_inclusive: bool = False,
    ) -> t.Iterator:

    Parameters

    • start: (Any, optional)
      • Start key, if not specified, start from the first data item in the table.
      • Same definition as the start parameter in the scan method.
    • end: (Any, optional)
      • End key, if not specified, iterate to the end of the table.
      • Same definition as the end parameter in the scan method.
    • keep_none: (bool, optional)
      • Whether to return columns with None values, not returned by default.
      • Same definition as the keep_none parameter in the scan method.
    • end_inclusive: (bool, optional)
      • Whether to include the row corresponding to end, not included by default.
      • Same definition as the end_inclusive parameter in the scan method.

    Example

    from starwhale import Evaluation

    evaluation_store = Evaluation(id="2ddab20df9e9430dbd73853d773a9ff6", project="self")

    evaluation_store.log_result(1, {"loss": 0.99, "accuracy": 0.98})
    evaluation_store.log_result(2, {"loss": 0.98, "accuracy": 0.99})
    results = [data for data in evaluation_store.scan_results()]

    flush_results

    flush_results is a method that can immediately flush the metrics logged by the log_results method to the datastore and oss storage. If the flush_results method is not called, Evaluation will automatically flush data to storage when it is finally closed.

    def flush_results(self, artifacts_flush: bool = True) -> None:

    Parameters

    • artifacts_flush: (bool, optional)
      • Whether to dump artifact data to blob files and upload them to related storage. Default is True.
      • Same definition as the artifacts_flush parameter in the flush method.

    log_summary

    log_summary is a method that logs certain metrics to the summary table. The evaluation page on Server/Cloud instances displays data from the summary table.

    Each time it is called, Starwhale will automatically update with the unique ID of this evaluation as the row ID of the table. This function can be called multiple times during one evaluation to update different columns.

    Each project has one summary table. All evaluation tasks under that project will write summary information to this table for easy comparison between evaluations of different models.

    def log_summary(self, *args: t.Any, **kw: t.Any) -> None:

    Same as log method, log_summary will automatically flatten the dict.

    Example

    from starwhale import Evaluation

    evaluation_store = Evaluation(id="2ddab20df9e9430dbd73853d773a9ff6", project="https://cloud.starwhale.cn/projects/349")

    evaluation_store.log_summary(loss=0.99)
    evaluation_store.log_summary(loss=0.99, accuracy=0.99)
    evaluation_store.log_summary({"loss": 0.99, "accuracy": 0.99})

    get_summary

    get_summary is a method that returns the information logged by log_summary.

    def get_summary(self) -> t.Dict:

    flush_summary

    flush_summary is a method that can immediately flush the metrics logged by the log_summary method to the datastore and oss storage. If the flush_results method is not called, Evaluation will automatically flush data to storage when it is finally closed.

    def flush_summary(self, artifacts_flush: bool = True) -> None:

    Parameters

    • artifacts_flush: (bool, optional)
      • Whether to dump artifact data to blob files and upload them to related storage. Default is True.
      • Same definition as the artifacts_flush parameter in the flush method.

    flush_all

    flush_all is a method that can immediately flush the metrics logged by log, log_results, log_summary methods to the datastore and oss storage. If the flush_all method is not called, Evaluation will automatically flush data to storage when it is finally closed.

    def flush_all(self, artifacts_flush: bool = True) -> None:

    Parameters

    • artifacts_flush: (bool, optional)
      • Whether to dump artifact data to blob files and upload them to related storage. Default is True.
      • Same definition as the artifacts_flush parameter in the flush method.

    get_tables

    get_tables is a method that returns the names of all tables generated during model evaluation. Note that this function does not return the summary table name.

    def get_tables(self) -> t.List[str]:

    close

    close is a method to close the Evaluation object. close will automatically flush data to storage when called. Evaluation also implements __enter__ and __exit__ methods, which can simplify manual close calls using with syntax.

    def close(self) -> None:

    Example

    from starwhale import Evaluation

    evaluation_store = Evaluation(id="2ddab20df9e9430dbd73853d773a9ff6", project="https://cloud.starwhale.cn/projects/349")
    evaluation_store.log_summary(loss=0.99)
    evaluation_store.close()

    # auto close when the with-context exits.
    with Evaluation.from_context() as e:
    e.log_summary(loss=0.99)

    @handler

    @handler is a decorator that provides the following functionalities:

    • On a Server instance, it requests the required resources to run.
    • It can control the number of replicas.
    • Multiple handlers can form a DAG through dependency relationships to control the execution workflow.
    • It can expose ports externally to run like a web handler.

    @fine_tune, @evaluation.predict and @evaluation.evalute can be considered applications of @handler in the certain specific areas. @handler is the underlying implementation of these decorators and is more fundamental and flexible.

    @classmethod
    def handler(
    cls,
    resources: t.Optional[t.Dict[str, t.Any]] = None,
    replicas: int = 1,
    needs: t.Optional[t.List[t.Callable]] = None,
    name: str = "",
    expose: int = 0,
    require_dataset: bool = False,
    ) -> t.Callable:

    Parameters

    • resources: (dict, optional)
      • Consistent with the resources parameter definition in @evaluation.predict.
    • needs: (List[Callable], optional)
      • Consistent with the needs parameter definition in @evaluation.predict.
    • replicas: (int, optional)
      • Consistent with the replicas parameter definition in @evaluation.predict.
    • name: (str, optional)
      • The name displayed for the handler.
      • If not specified, use the decorated function's name.
    • expose: (int, optional)
      • The port exposed externally. When running a web handler, the exposed port needs to be declared.
      • The default is 0, meaning no port is exposed.
      • Currently only one port can be exposed.
    • require_dataset: (bool, optional)
      • Defines whether this handler requires a dataset when running.
      • If required_dataset=True, the user is required to input a dataset when creating an evaluation task on the Server/Cloud instance web page. If required_dataset=False, the user does not need to specify a dataset on the web page.
      • The default is False.

    Examples

    from starwhale import handler
    import gradio

    @handler(resources={"cpu": 1, "nvidia.com/gpu": 1}, replicas=3)
    def my_handler():
    ...

    @handler(needs=[my_handler])
    def my_another_handler():
    ...

    @handler(expose=7860)
    def chatbot():
    with gradio.Blocks() as server:
    ...
    server.launch(server_name="0.0.0.0", server_port=7860)

    @fine_tune

    fine_tune is a decorator that defines the fine-tuning process for model training.

    Some restrictions and usage suggestions:

    • fine_tune has only one replica.
    • fine_tune requires dataset input.
    • Generally, the dataset is obtained through Context.get_runtime_context() at the start of fine_tune.
    • Generally, at the end of fine_tune, the fine-tuned Starwhale model package is generated through starwhale.model.build, which will be automatically copied to the corresponding evaluation project.

    Parameters

    • resources: (dict, optional)
      • Consistent with the resources parameter definition in @evaluation.predict.
    • needs: (List[Callable], optional)
      • Consistent with the needs parameter definition in @evaluation.predict.

    Examples

    from starwhale import model as starwhale_model
    from starwhale import fine_tune, Context

    @fine_tune(resources={"nvidia.com/gpu": 1})
    def llama_fine_tuning():
    ctx = Context.get_runtime_context()

    if len(ctx.dataset_uris) == 2:
    # TODO: use more graceful way to get train and eval dataset
    train_dataset = dataset(ctx.dataset_uris[0], readonly=True, create="forbid")
    eval_dataset = dataset(ctx.dataset_uris[1], readonly=True, create="forbid")
    elif len(ctx.dataset_uris) == 1:
    train_dataset = dataset(ctx.dataset_uris[0], readonly=True, create="forbid")
    eval_dataset = None
    else:
    raise ValueError("Only support 1 or 2 datasets(train and eval dataset) for now")

    #user training code
    train_llama(
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    )

    model_name = get_model_name()
    starwhale_model.build(name=f"llama-{model_name}-qlora-ft")

    @multi_classification

    The @multi_classification decorator uses the sklearn lib to analyze results for multi-classification problems, outputting the confusion matrix, ROC, AUC etc., and writing them to related tables in the Starwhale Datastore.

    When using it, certain requirements are placed on the return value of the decorated function, which should be (label, result) or (label, result, probability_matrix).

    def multi_classification(
    confusion_matrix_normalize: str = "all",
    show_hamming_loss: bool = True,
    show_cohen_kappa_score: bool = True,
    show_roc_auc: bool = True,
    all_labels: t.Optional[t.List[t.Any]] = None,
    ) -> t.Any:

    Parameters

    • confusion_matrix_normalize: (str, optional)
      • Accepts three parameters:
        • true: rows
        • pred: columns
        • all: rows+columns
    • show_hamming_loss: (bool, optional)
      • Whether to calculate the Hamming loss.
      • The default is True.
    • show_cohen_kappa_score: (bool, optional)
      • Whether to calculate the Cohen kappa score.
      • The default is True.
    • show_roc_auc: (bool, optional)
      • Whether to calculate ROC/AUC. To calculate, the function needs to return a (label, result, probability_matrix) tuple, otherwise a (label, result) tuple is sufficient.
      • The default is True.
    • all_labels: (List, optional)
      • Defines all the labels.

    Examples


    @multi_classification(
    confusion_matrix_normalize="all",
    show_hamming_loss=True,
    show_cohen_kappa_score=True,
    show_roc_auc=True,
    all_labels=[i for i in range(0, 10)],
    )
    def evaluate(ppl_result) -> t.Tuple[t.List[int], t.List[int], t.List[t.List[float]]]:
    label, result, probability_matrix = [], [], []
    return label, result, probability_matrix

    @multi_classification(
    confusion_matrix_normalize="all",
    show_hamming_loss=True,
    show_cohen_kappa_score=True,
    show_roc_auc=False,
    all_labels=[i for i in range(0, 10)],
    )
    def evaluate(ppl_result) -> t.Tuple[t.List[int], t.List[int], t.List[t.List[float]]]:
    label, result = [], [], []
    return label, result

    PipelineHandler

    The PipelineHandler class provides a default model evaluation workflow definition that requires users to implement the predict and evaluate functions.

    The PipelineHandler is equivalent to using the @evaluation.predict and @evaluation.evaluate decorators together - the usage looks different but the underlying model evaluation process is the same.

    Note that PipelineHandler currently does not support defining resources parameters.

    Users need to implement the following functions:

    • predict: Defines the inference process, equivalent to a function decorated with @evaluation.predict.

    • evaluate: Defines the evaluation process, equivalent to a function decorated with @evaluation.evaluate.

    from typing import Any, Iterator
    from abc import ABCMeta, abstractmethod

    class PipelineHandler(metaclass=ABCMeta):
    def __init__(
    self,
    predict_batch_size: int = 1,
    ignore_error: bool = False,
    predict_auto_log: bool = True,
    predict_log_mode: str = PredictLogMode.PICKLE.value,
    predict_log_dataset_features: t.Optional[t.List[str]] = None,
    **kwargs: t.Any,
    ) -> None:
    self.context = Context.get_runtime_context()
    ...

    def predict(self, data: Any, **kw: Any) -> Any:
    raise NotImplementedError

    def evaluate(self, ppl_result: Iterator) -> Any
    raise NotImplementedError

    Parameters

    • predict_batch_size: (int, optional)
      • Equivalent to the batch_size parameter in @evaluation.predict.
      • Default is 1.
    • ignore_error: (bool, optional)
      • Equivalent to the fail_on_error parameter in @evaluation.predict.
      • Default is False.
    • predict_auto_log: (bool, optional)
      • Equivalent to the auto_log parameter in @evaluation.predict.
      • Default is True.
    • predict_log_mode: (str, optional)
      • Equivalent to the log_mode parameter in @evaluation.predict.
      • Default is pickle.
    • predict_log_dataset_features: (bool, optional)
      • Equivalent to the log_dataset_features parameter in @evaluation.predict.
      • Default is None, which records all features.

    PipelineHandler.run Decorator

    The PipelineHandler.run decorator can be used to describe resources for the predict and evaluate methods, supporting definitions of replicas and resources:

    • The PipelineHandler.run decorator can only decorate predict and evaluate methods in subclasses inheriting from PipelineHandler.
    • The predict method can set the replicas parameter. The replicas value for the evaluate method is always 1.
    • The resources parameter is defined and used in the same way as the resources parameter in @evaluation.predict or @evaluation.evaluate.
    • The PipelineHandler.run decorator is optional.
    • The PipelineHandler.run decorator only takes effect on Server and Cloud instances, not Standalone instances that don't support resource definition.
    @classmethod
    def run(
    cls, resources: t.Optional[t.Dict[str, t.Any]] = None, replicas: int = 1
    ) -> t.Callable:

    Examples

    import typing as t

    import torch
    from starwhale import PipelineHandler

    class Example(PipelineHandler):
    def __init__(self) -> None:
    super().__init__()
    self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    self.model = self._load_model(self.device)

    @PipelineHandler.run(replicas=4, resources={"memory": 1 * 1024 * 1024 *1024, "nvidia.com/gpu": 1}) # 1G Memory, 1 GPU
    def predict(self, data: t.Dict):
    data_tensor = self._pre(data.img)
    output = self.model(data_tensor)
    return self._post(output)

    @PipelineHandler.run(resources={"memory": 1 * 1024 * 1024 *1024}) # 1G Memory
    def evaluate(self, ppl_result):
    result, label, pr = [], [], []
    for _data in ppl_result:
    label.append(_data["input"]["label"])
    result.extend(_data["output"][0])
    pr.extend(_data["output"][1])
    return label, result, pr

    def _pre(self, input: Image) -> torch.Tensor:
    ...

    def _post(self, input):
    ...

    def _load_model(self, device):
    ...

    Context

    The context information passed during model evaluation, including Project, Task ID, etc. The Context content is automatically injected and can be used in the following ways:

    • Inherit the PipelineHandler class and use the self.context object.
    • Get it through Context.get_runtime_context().

    Note that Context can only be used during model evaluation, otherwise the program will throw an exception.

    Currently Context can get the following values:

    • project: str
      • Project name.
    • version: str
      • Unique ID of model evaluation.
    • step: str
      • Step name.
    • total: int
      • Total number of Tasks under the Step.
    • index: int
      • Task index number, starting from 0.
    • dataset_uris: List[str]
      • List of Starwhale dataset URIs.

    Examples


    from starwhale import Context, PipelineHandler

    def func():
    ctx = Context.get_runtime_context()
    print(ctx.project)
    print(ctx.version)
    print(ctx.step)
    ...

    class Example(PipelineHandler):

    def predict(self, data: t.Dict):
    print(self.context.project)
    print(self.context.version)
    print(self.context.step)

    @starwhale.api.service.api

    @starwhale.api.service.api is a decorator that provides a simple Web Handler input definition based on Gradio for accepting external requests and returning inference results to the user when launching a Web Service with the swcli model serve command, enabling online evaluation.

    Examples

    import gradio
    from starwhale.api.service import api

    def predict_image(img):
    ...

    @api(gradio.File(), gradio.Label())
    def predict_view(file: t.Any) -> t.Any:
    with open(file.name, "rb") as f:
    data = Image(f.read(), shape=(28, 28, 1))
    _, prob = predict_image({"img": data})
    return {i: p for i, p in enumerate(prob)}

    starwhale.api.service.Service

    If you want to customize the web service implementation, you can subclass Service and override the serve method.

    class CustomService(Service):
    def serve(self, addr: str, port: int, handler_list: t.List[str] = None) -> None:
    ...

    svc = CustomService()

    @svc.api(...)
    def handler(data):
    ...

    Notes:

    • Handlers added with PipelineHandler.add_api and the api decorator or Service.api can work together
    • If using a custom Service, you need to instantiate the custom Service class in the model

    Custom Request and Response

    Request and Response are handler preprocessing and postprocessing classes for receiving user requests and returning results. They can be simply understood as pre and post logic for the handler.

    Starwhale provides built-in Request implementations for Dataset types and Json Response. Users can also customize the logic as follows:

    import typing as t

    from starwhale.api.service import (
    Request,
    Service,
    Response,
    )

    class CustomInput(Request):
    def load(self, req: t.Any) -> t.Any:
    return req

    class CustomOutput(Response):
    def __init__(self, prefix: str) -> None:
    self.prefix = prefix

    def dump(self, req: str) -> bytes:
    return f"{self.prefix} {req}".encode("utf-8")

    svc = Service()

    @svc.api(request=CustomInput(), response=CustomOutput("hello"))
    def foo(data: t.Any) -> t.Any:
    ...
    - + \ No newline at end of file diff --git a/reference/sdk/job/index.html b/reference/sdk/job/index.html index f04523e2a..4c02b96c1 100644 --- a/reference/sdk/job/index.html +++ b/reference/sdk/job/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    Starwhale Job SDK

    job

    Get a starwhale.Job object through the Job URI parameter, which represents a Job on Standalone/Server/Cloud instances.

    @classmethod
    def job(
    cls,
    uri: str,
    ) -> Job:

    Parameters

    • uri: (str, required)
      • Job URI format.

    Usage Example

    from starwhale import job

    # get job object of uri=https://server/job/1
    j1 = job("https://server/job/1")

    # get job from standalone instance
    j2 = job("local/project/self/job/xm5wnup")
    j3 = job("xm5wnup")

    class starwhale.Job

    starwhale.Job abstracts Starwhale Job and enables some information retrieval operations on the job.

    list

    list is a classmethod that can list the jobs under a project.

    @classmethod
    def list(
    cls,
    project: str = "",
    page_index: int = DEFAULT_PAGE_IDX,
    page_size: int = DEFAULT_PAGE_SIZE,
    ) -> Tuple[List[Job], Dict]:

    Parameters

    • project: (str, optional)
      • Project URI, can be projects on Standalone/Server/Cloud instances.
      • If project is not specified, the project selected by swcli project selected will be used.
    • page_index: (int, optional)
      • When getting the jobs list from Server/Cloud instances, paging is supported. This parameter specifies the page number.
        • Default is 1.
        • Page numbers start from 1.
      • Standalone instances do not support paging. This parameter has no effect.
    • page_size: (int, optional)
      • When getting the jobs list from Server/Cloud instances, paging is supported. This parameter specifies the number of jobs returned per page.
        • Default is 1.
        • Page numbers start from 1.
      • Standalone instances do not support paging. This parameter has no effect.

    Usage Example

    from starwhale import Job

    # list jobs of current selected project
    jobs, pagination_info = Job.list()

    # list jobs of starwhale/public project in the cloud.starwhale.cn instance
    jobs, pagination_info = Job.list("https://cloud.starwhale.cn/project/starwhale:public")

    # list jobs of id=1 project in the server instance, page index is 2, page size is 10
    jobs, pagination_info = Job.list("https://server/project/1", page_index=2, page_size=10)

    get

    get is a classmethod that gets information about a specific job and returns a Starwhale.Job object. It has the same functionality and parameter definitions as the starwhale.job function.

    Usage Example

    from starwhale import Job

    # get job object of uri=https://server/job/1
    j1 = Job.get("https://server/job/1")

    # get job from standalone instance
    j2 = Job.get("local/project/self/job/xm5wnup")
    j3 = Job.get("xm5wnup")

    summary

    summary is a property that returns the data written to the summary table during the job execution, in dict type.

    @property
    def summary(self) -> Dict[str, Any]:

    Usage Example

    from starwhale import jobs

    j1 = job("https://server/job/1")

    print(j1.summary)

    tables

    tables is a property that returns the names of tables created during the job execution (not including the summary table, which is created automatically at the project level), in list type.

    @property
    def tables(self) -> List[str]:

    Usage Example

    from starwhale import jobs

    j1 = job("https://server/job/1")

    print(j1.tables)

    get_table_rows

    get_table_rows is a method that returns records from a data table according to the table name and other parameters, in iterator type.

    def get_table_rows(
    self,
    name: str,
    start: Any = None,
    end: Any = None,
    keep_none: bool = False,
    end_inclusive: bool = False,
    ) -> Iterator[Dict[str, Any]]:

    Parameters

    • name: (str, required)
      • Datastore table name. The one of table names obtained through the tables property is ok.
    • start: (Any, optional)
      • The starting ID value of the returned records.
      • Default is None, meaning start from the beginning of the table.
    • end: (Any, optional)
      • The ending ID value of the returned records.
      • Default is None, meaning until the end of the table.
      • If both start and end are None, all records in the table will be returned as an iterator.
    • keep_none: (bool, optional)
      • Whether to return records with None values.
      • Default is False.
    • end_inclusive: (bool, optional)
      • When end is set, whether the iteration includes the end record.
      • Default is False.

    Usage Example

    from starwhale import job

    j = job("local/project/self/job/xm5wnup")

    table_name = j.tables[0]

    for row in j.get_table_rows(table_name):
    print(row)

    rows = list(j.get_table_rows(table_name, start=0, end=100))

    # return the first record from the results table
    result = list(j.get_table_rows('results', start=0, end=1))[0]

    status

    status is a property that returns the current real-time state of the Job as a string. The possible states are CREATED, READY, PAUSED, RUNNING, CANCELLING, CANCELED, SUCCESS, FAIL, and UNKNOWN.

    @property
    def status(self) -> str:

    create

    create is a classmethod that can create tasks on a Standalone instance or Server/Cloud instance, including tasks for Model Evaluation, Fine-tuning, Online Serving, and Developing. The function returns a Job object.

    • create determines which instance the generated task runs on through the project parameter, including Standalone and Server/Cloud instances.
    • On a Standalone instance, create creates a synchronously executed task.
    • On a Server/Cloud instance, create creates an asynchronously executed task.
    @classmethod
    def create(
    cls,
    project: Project | str,
    model: Resource | str,
    run_handler: str,
    datasets: t.List[str | Resource] | None = None,
    runtime: Resource | str | None = None,
    resource_pool: str = DEFAULT_RESOURCE_POOL,
    ttl: int = 0,
    dev_mode: bool = False,
    dev_mode_password: str = "",
    dataset_head: int = 0,
    overwrite_specs: t.Dict[str, t.Any] | None = None,
    ) -> Job:

    Parameters

    Parameters apply to all instances:

    • project: (Project|str, required)
      • A Project object or Project URI string.
    • model: (Resource|str, required)
      • Model URI string or Resource object of Model type, representing the Starwhale model package to run.
    • run_handler: (str, required)
      • The name of the runnable handler in the Starwhale model package, e.g. the evaluate handler of mnist: mnist.evaluator:MNISTInference.evaluate.
    • datasets: (List[str | Resource], optional)
      • Datasets required for the Starwhale model package to run, not required.

    Parameters only effective for Standalone instances:

    • dataset_head: (int, optional)
      • Generally used for debugging scenarios, only uses the first N data in the dataset for the Starwhale model to consume.

    Parameters only effective for Server/Cloud instances:

    • runtime: (Resource | str, optional)
      • Runtime URI string or Resource object of Runtime type, representing the Starwhale runtime required to run the task.
      • When not specified, it will try to use the built-in runtime of the Starwhale model package.
      • When creating tasks under a Standalone instance, the Python interpreter environment used by the Python script is used as its own runtime. Specifying a runtime via the runtime parameter is not supported. If you need to specify a runtime, you can use the swcli model run command.
    • resource_pool: (str, optional)
      • Specify which resource pool the task runs in, default to the default resource pool.
    • ttl: (int, optional)
      • Maximum lifetime of the task, will be killed after timeout.
      • The unit is seconds.
      • By default, ttl is 0, meaning no timeout limit, and the task will run as expected.
      • When ttl is less than 0, it also means no timeout limit.
    • dev_mode: (bool, optional)
      • Whether to set debug mode. After turning on this mode, you can enter the related environment through VSCode Web.
      • Debug mode is off by default.
    • dev_mode_password: (str, optional)
      • Login password for VSCode Web in debug mode.
      • Default is empty, in which case the task's UUID will be used as the password, which can be obtained via job.info().job.uuid.
    • overwrite_specs: (Dict[str, Any], optional)
      • Support setting the replicas and resources fields of the handler.
      • If empty, use the values set in the corresponding handler of the model package.
      • The key of overwrite_specs is the name of the handler, e.g. the evaluate handler of mnist: mnist.evaluator:MNISTInference.evaluate.
      • The value of overwrite_specs is the set value, in dictionary format, supporting settings for replicas and resources, e.g. {"replicas": 1, "resources": {"memory": "1GiB"}}.

    Examples

    • create a Cloud Instance job
    from starwhale import Job
    project = "https://cloud.starwhale.cn/project/starwhale:public"
    job = Job.create(
    project=project,
    model=f"{project}/model/mnist/version/v0",
    run_handler="mnist.evaluator:MNISTInference.evaluate",
    datasets=[f"{project}/dataset/mnist/version/v0"],
    runtime=f"{project}/runtime/pytorch",
    overwrite_specs={"mnist.evaluator:MNISTInference.evaluate": {"resources": "4GiB"},
    "mnist.evaluator:MNISTInference.predict": {"resources": "8GiB", "replicas": 10}}
    )
    print(job.status)
    • create a Standalone Instance job
    from starwhale import Job
    job = Job.create(
    project="self",
    model="mnist",
    run_handler="mnist.evaluator:MNISTInference.evaluate",
    datasets=["mnist"],
    )
    print(job.status)
    - + \ No newline at end of file diff --git a/reference/sdk/model/index.html b/reference/sdk/model/index.html index 54141703d..434559a36 100644 --- a/reference/sdk/model/index.html +++ b/reference/sdk/model/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    Starwhale Model SDK

    model.build

    model.build is a function that can build the Starwhale model, equivalent to the swcli model build command.

    def build(
    modules: t.Optional[t.List[t.Any]] = None,
    workdir: t.Optional[_path_T] = None,
    name: t.Optional[str] = None,
    project_uri: str = "",
    desc: str = "",
    remote_project_uri: t.Optional[str] = None,
    add_all: bool = False,
    tags: t.List[str] | None = None,
    ) -> None:

    Parameters

    • modules: (List[str|object], optional)
      • The search modules supports object(function, class or module) or str(example: "to.path.module", "to.path.module:object").
      • If the argument is not specified, the search modules are the imported modules.
    • name: (str, optional)
      • Starwhale Model name.
      • The default is the current work dir (cwd) name.
    • workdir: (str, Pathlib.Path, optional)
      • The path of the rootdir. The default workdir is the current working dir.
      • All files in the workdir will be packaged. If you want to ignore some files, you can add .swignore file in the workdir.
    • project_uri: (str, optional)
      • The project uri of the Starwhale Model.
      • If the argument is not specified, the project_uri is the config value of swcli project select command.
    • desc: (str, optional)
      • The description of the Starwhale Model.
    • remote_project_uri: (str, optional)
      • Project URI of another example instance. After the Starwhale model is built, it will be automatically copied to the remote instance.
    • add_all: (bool, optional)
      • Add all files in the working directory to the model package(excludes python cache files and virtual environment files when disabled).The .swignore file still takes effect.
      • The default value is False.
    • tags: (List[str], optional)
      • The tags for the model version.
      • latest and ^v\d+$ tags are reserved tags.

    Examples

    from starwhale import model

    # class search handlers
    from .user.code.evaluator import ExamplePipelineHandler
    model.build([ExamplePipelineHandler])

    # function search handlers
    from .user.code.evaluator import predict_image
    model.build([predict_image])

    # module handlers, @handler decorates function in this module
    from .user.code import evaluator
    model.build([evaluator])

    # str search handlers
    model.build(["user.code.evaluator:ExamplePipelineHandler"])
    model.build(["user.code1", "user.code2"])

    # no search handlers, use imported modules
    model.build()

    # add user custom tags
    model.build(tags=["t1", "t2"])
    - + \ No newline at end of file diff --git a/reference/sdk/other/index.html b/reference/sdk/other/index.html index d0c232600..7fc2b3eac 100644 --- a/reference/sdk/other/index.html +++ b/reference/sdk/other/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    Other SDK

    __version__

    Version of Starwhale Python SDK and swcli, string constant.

    >>> from starwhale import __version__
    >>> print(__version__)
    0.5.7

    init_logger

    Initialize Starwhale logger and traceback. The default value is 0.

    • 0: show only errors, traceback only shows 1 frame.
    • 1: show errors + warnings, traceback shows 5 frames.
    • 2: show errors + warnings + info, traceback shows 10 frames.
    • 3: show errors + warnings + info + debug, traceback shows 100 frames.
    • >=4: show errors + warnings + info + debug + trace, traceback shows 1000 frames.
    def init_logger(verbose: int = 0) -> None:

    login

    Log in to a server/cloud instance. It is equivalent to running the swcli instance login command. Log in to the Standalone instance is meaningless.

    def login(
    instance: str,
    alias: str = "",
    username: str = "",
    password: str = "",
    token: str = "",
    ) -> None:

    Parameters

    • instance: (str, required)
      • The http url of the server/cloud instance.
    • alias: (str, optional)
      • An alias for the instance to simplify the instance part of the Starwhale URI.
      • If not specified, the hostname part of the instance http url will be used.
    • username: (str, optional)
    • password: (str, optional)
    • token: (str, optional)
      • You can only choose one of username + password or token to login to the instance.

    Examples

    from starwhale import login

    # login to Starwhale Cloud instance by token
    login(instance="https://cloud.starwhale.cn", alias="cloud-cn", token="xxx")

    # login to Starwhale Server instance by username and password
    login(instance="http://controller.starwhale.svc", alias="dev", username="starwhale", password="abcd1234")

    logout

    Log out of a server/cloud instance. It is equivalent to running the swcli instance logout command. Log out of the Standalone instance is meaningless.

    def logout(instance: str) -> None:

    Examples

    from starwhale import login, logout

    login(instance="https://cloud.starwhale.cn", alias="cloud-cn", token="xxx")
    # logout by the alias
    logout("cloud-cn")

    login(instance="http://controller.starwhale.svc", alias="dev", username="starwhale", password="abcd1234")
    # logout by the instance http url
    logout("http://controller.starwhale.svc")
    - + \ No newline at end of file diff --git a/reference/sdk/overview/index.html b/reference/sdk/overview/index.html index 493e845ef..863a274ee 100644 --- a/reference/sdk/overview/index.html +++ b/reference/sdk/overview/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    Python SDK Overview

    Starwhale provides a series of Python SDKs to help manage datasets, models, evaluations etc. Using the Starwhale Python SDK can make it easier to complete your ML/DL development tasks.

    Classes

    • PipelineHandler: Provides default model evaluation process definition, requires implementation of predict and evaluate methods.
    • Context: Passes context information during model evaluation, including Project, Task ID etc.
    • class Dataset: Starwhale Dataset class.
    • class starwhale.api.service.Service: The base class of online evaluation.
    • class Job: Starwhale Job class.
    • class Evaluation: Starwhale Evaluation class.

    Functions

    • @multi_classification: Decorator for multi-class problems to simplify evaluate result calculation and storage for better evaluation presentation.
    • @handler: Decorator to define a running entity with resource attributes (mem/cpu/gpu). You can control replica count. Handlers can form DAGs through dependencies to control execution flow.
    • @evaluation.predict: Decorator to define inference process in model evaluation, similar to map phase in MapReduce.
    • @evaluation.evaluate: Decorator to define evaluation process in model evaluation, similar to reduce phase in MapReduce.
    • model.build: Build Starwhale model.
    • @fine_tune: Decorator to define model fine-tuning process.
    • init_logger: Set log level, implement 5-level logging.
    • dataset: Get starwhale.Dataset object, by creating new datasets or loading existing datasets.
    • @starwhale.api.service.api: Decorator to provide a simple Web Handler input definition based on Gradio.
    • login: Log in to the server/cloud instance.
    • logout: Log out of the server/cloud instance.
    • job: Get starwhale.Job object by the Job URI.
    • @PipelineHandler.run: Decorator to define the resources for the predict and evaluate methods in PipelineHandler subclasses.

    Data Types

    • COCOObjectAnnotation: Provides COCO format definitions.
    • BoundingBox: Bounding box type, currently in LTWH format - left_x, top_y, width and height.
    • ClassLabel: Describes the number and types of labels.
    • Image: Image type.
    • GrayscaleImage: Grayscale image type, e.g. MNIST digit images, a special case of Image type.
    • Audio: Audio type.
    • Video: Video type.
    • Text: Text type, default utf-8 encoding, for storing large texts.
    • Binary: Binary type, stored in bytes, for storing large binary content.
    • Line: Line type.
    • Point: Point type.
    • Polygon: Polygon type.
    • Link: Link type, for creating remote-link data.
    • MIMEType: Describes multimedia types supported by Starwhale, used in mime_type attribute of Image, Video etc for better Dataset Viewer.

    Other

    • __version__: Version of Starwhale Python SDK and swcli, string constant.

    Further reading

    - + \ No newline at end of file diff --git a/reference/sdk/type/index.html b/reference/sdk/type/index.html index ff2200001..ba221e346 100644 --- a/reference/sdk/type/index.html +++ b/reference/sdk/type/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    Starwhale Data Types

    COCOObjectAnnotation

    It provides definitions following the COCO format.

    COCOObjectAnnotation(
    id: int,
    image_id: int,
    category_id: int,
    segmentation: Union[t.List, t.Dict],
    area: Union[float, int],
    bbox: Union[BoundingBox, t.List[float]],
    iscrowd: int,
    )
    ParameterDescription
    idObject id, usually a globally incrementing id
    image_idImage id, usually id of the image
    category_idCategory id, usually id of the class in object detection
    segmentationObject contour representation, Polygon (polygon vertices) or RLE format
    areaObject area
    bboxRepresents bounding box, can be BoundingBox type or list of floats
    iscrowd0 indicates a single object, 1 indicates two unseparated objects

    Examples

    def _make_coco_annotations(
    self, mask_fpath: Path, image_id: int
    ) -> t.List[COCOObjectAnnotation]:
    mask_img = PILImage.open(str(mask_fpath))

    mask = np.array(mask_img)
    object_ids = np.unique(mask)[1:]
    binary_mask = mask == object_ids[:, None, None]
    # TODO: tune permute without pytorch
    binary_mask_tensor = torch.as_tensor(binary_mask, dtype=torch.uint8)
    binary_mask_tensor = (
    binary_mask_tensor.permute(0, 2, 1).contiguous().permute(0, 2, 1)
    )

    coco_annotations = []
    for i in range(0, len(object_ids)):
    _pos = np.where(binary_mask[i])
    _xmin, _ymin = float(np.min(_pos[1])), float(np.min(_pos[0]))
    _xmax, _ymax = float(np.max(_pos[1])), float(np.max(_pos[0]))
    _bbox = BoundingBox(
    x=_xmin, y=_ymin, width=_xmax - _xmin, height=_ymax - _ymin
    )

    rle: t.Dict = coco_mask.encode(binary_mask_tensor[i].numpy()) # type: ignore
    rle["counts"] = rle["counts"].decode("utf-8")

    coco_annotations.append(
    COCOObjectAnnotation(
    id=self.object_id,
    image_id=image_id,
    category_id=1, # PennFudan Dataset only has one class-PASPersonStanding
    segmentation=rle,
    area=_bbox.width * _bbox.height,
    bbox=_bbox,
    iscrowd=0, # suppose all instances are not crowd
    )
    )
    self.object_id += 1

    return coco_annotations

    GrayscaleImage

    GrayscaleImage provides a grayscale image type. It is a special case of the Image type, for example the digit images in MNIST.

    GrayscaleImage(
    fp: _TArtifactFP = "",
    display_name: str = "",
    shape: Optional[_TShape] = None,
    as_mask: bool = False,
    mask_uri: str = "",
    )
    ParameterDescription
    fpImage path, IO object, or file content bytes
    display_nameDisplay name shown in Dataset Viewer
    shapeImage width and height, default channel is 1
    as_maskWhether used as a mask image
    mask_uriURI of the original image for the mask

    Examples

    for i in range(0, min(data_number, label_number)):
    _data = data_file.read(image_size)
    _label = struct.unpack(">B", label_file.read(1))[0]
    yield GrayscaleImage(
    _data,
    display_name=f"{i}",
    shape=(height, width, 1),
    ), {"label": _label}

    GrayscaleImage Functions

    GrayscaleImage.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    GrayscaleImage.carry_raw_data

    carry_raw_data() -> GrayscaleImage

    GrayscaleImage.astype

    astype() -> Dict[str, t.Any]

    BoundingBox

    BoundingBox provides a bounding box type, currently in LTWH format:

    • left_x: x-coordinate of left edge
    • top_y: y-coordinate of top edge
    • width: width of bounding box
    • height: height of bounding box

    So it represents the bounding box using the coordinates of its left, top, width and height. This is a common format for specifying bounding boxes in computer vision tasks.

    BoundingBox(
    x: float,
    y: float,
    width: float,
    height: float
    )
    ParameterDescription
    xx-coordinate of left edge (left_x)
    yy-coordinate of top edge (top_y)
    widthWidth of bounding box
    heightHeight of bounding box

    ClassLabel

    Describe labels.

    ClassLabel(
    names: List[Union[int, float, str]]
    )

    Image

    Image Type.

    Image(
    fp: _TArtifactFP = "",
    display_name: str = "",
    shape: Optional[_TShape] = None,
    mime_type: Optional[MIMEType] = None,
    as_mask: bool = False,
    mask_uri: str = "",
    )
    ParameterDescription
    fpImage path, IO object, or file content bytes
    display_nameDisplay name shown in Dataset Viewer
    shapeImage width, height and channels
    mime_typeMIMEType supported types
    as_maskWhether used as a mask image
    mask_uriURI of the original image for the mask

    The main difference from GrayscaleImage is that Image supports multi-channel RGB images by specifying shape as (W, H, C).

    Examples

    import io
    import typing as t
    import pickle
    from PIL import Image as PILImage
    from starwhale import Image, MIMEType

    def _iter_item(paths: t.List[Path]) -> t.Generator[t.Tuple[t.Any, t.Dict], None, None]:
    for path in paths:
    with path.open("rb") as f:
    content = pickle.load(f, encoding="bytes")
    for data, label, filename in zip(
    content[b"data"], content[b"labels"], content[b"filenames"]
    ):
    annotations = {
    "label": label,
    "label_display_name": dataset_meta["label_names"][label],
    }

    image_array = data.reshape(3, 32, 32).transpose(1, 2, 0)
    image_bytes = io.BytesIO()
    PILImage.fromarray(image_array).save(image_bytes, format="PNG")

    yield Image(
    fp=image_bytes.getvalue(),
    display_name=filename.decode(),
    shape=image_array.shape,
    mime_type=MIMEType.PNG,
    ), annotations

    Image Functions

    Image.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Image.carry_raw_data

    carry_raw_data() -> GrayscaleImage

    Image.astype

    astype() -> Dict[str, t.Any]

    Video

    Video type.

    Video(
    fp: _TArtifactFP = "",
    display_name: str = "",
    mime_type: Optional[MIMEType] = None,
    )
    ParameterDescription
    fpVideo path, IO object, or file content bytes
    display_nameDisplay name shown in Dataset Viewer
    mime_typeMIMEType supported types

    Examples

    import typing as t
    from pathlib import Path

    from starwhale import Video, MIMEType

    root_dir = Path(__file__).parent.parent
    dataset_dir = root_dir / "data" / "UCF-101"
    test_ds_path = [root_dir / "data" / "test_list.txt"]

    def iter_ucf_item() -> t.Generator:
    for path in test_ds_path:
    with path.open() as f:
    for line in f.readlines():
    _, label, video_sub_path = line.split()

    data_path = dataset_dir / video_sub_path
    data = Video(
    data_path,
    display_name=video_sub_path,
    shape=(1,),
    mime_type=MIMEType.WEBM,
    )

    yield f"{label}_{video_sub_path}", {
    "video": data,
    "label": label,
    }

    Audio

    Audio type.

    Audio(
    fp: _TArtifactFP = "",
    display_name: str = "",
    mime_type: Optional[MIMEType] = None,
    )
    ParameterDescription
    fpAudio path, IO object, or file content bytes
    display_nameDisplay name shown in Dataset Viewer
    mime_typeMIMEType supported types

    Examples

    import typing as t
    from starwhale import Audio

    def iter_item() -> t.Generator[t.Tuple[t.Any, t.Any], None, None]:
    for path in validation_ds_paths:
    with path.open() as f:
    for item in f.readlines():
    item = item.strip()
    if not item:
    continue

    data_path = dataset_dir / item
    data = Audio(
    data_path, display_name=item, shape=(1,), mime_type=MIMEType.WAV
    )

    speaker_id, utterance_num = data_path.stem.split("_nohash_")
    annotations = {
    "label": data_path.parent.name,
    "speaker_id": speaker_id,
    "utterance_num": int(utterance_num),
    }
    yield data, annotations

    Audio Functions

    Audio.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Audio.carry_raw_data

    carry_raw_data() -> Audio

    Audio.astype

    astype() -> Dict[str, t.Any]

    Text

    Text type, the default encode type is utf-8.

    Text(
    content: str,
    encoding: str = "utf-8",
    )
    ParameterDescription
    contentThe text content
    encodingEncoding format of the text

    Examples

    import typing as t
    from pathlib import Path
    from starwhale import Text

    def iter_item(self) -> t.Generator[t.Tuple[t.Any, t.Any], None, None]:
    root_dir = Path(__file__).parent.parent / "data"

    with (root_dir / "fra-test.txt").open("r") as f:
    for line in f.readlines():
    line = line.strip()
    if not line or line.startswith("CC-BY"):
    continue

    _data, _label, *_ = line.split("\t")
    data = Text(_data, encoding="utf-8")
    annotations = {"label": _label}
    yield data, annotations

    Text Functions

    to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Text.carry_raw_data

    carry_raw_data() -> Text

    Text.astype

    astype() -> Dict[str, t.Any]

    Text.to_str

    to_str() -> str

    Binary

    Binary provides a binary data type, stored as bytes.

    Binary(
    fp: _TArtifactFP = "",
    mime_type: MIMEType = MIMEType.UNDEFINED,
    )
    ParameterDescription
    fpPath, IO object, or file content bytes
    mime_typeMIMEType supported types

    Binary Functions

    Binary.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Binary.carry_raw_data

    carry_raw_data() -> Binary

    Binary.astype

    astype() -> Dict[str, t.Any]

    Link provides a link type to create remote-link datasets in Starwhale.

    Link(
    uri: str,
    auth: Optional[LinkAuth] = DefaultS3LinkAuth,
    offset: int = 0,
    size: int = -1,
    data_type: Optional[BaseArtifact] = None,
    )
    ParameterDescription
    uriURI of the original data, currently supports localFS and S3 protocols
    authLink auth information
    offsetData offset relative to file pointed by uri
    sizeData size
    data_typeActual data type pointed by the link, currently supports Binary, Image, Text, Audio and Video

    Link.astype

    astype() -> Dict[str, t.Any]

    MIMEType

    MIMEType describes the multimedia types supported by Starwhale, implemented using Python Enum. It is used in the mime_type attribute of Image, Video etc to enable better Dataset Viewer support.

    class MIMEType(Enum):
    PNG = "image/png"
    JPEG = "image/jpeg"
    WEBP = "image/webp"
    SVG = "image/svg+xml"
    GIF = "image/gif"
    APNG = "image/apng"
    AVIF = "image/avif"
    PPM = "image/x-portable-pixmap"
    MP4 = "video/mp4"
    AVI = "video/avi"
    WEBM = "video/webm"
    WAV = "audio/wav"
    MP3 = "audio/mp3"
    PLAIN = "text/plain"
    CSV = "text/csv"
    HTML = "text/html"
    GRAYSCALE = "x/grayscale"
    UNDEFINED = "x/undefined"

    Line

    from starwhale import ds, Point, Line

    with dataset("collections") as ds:
    line_points = [
    Point(x=0.0, y=1.0),
    Point(x=0.0, y=100.0)
    ]
    ds.append({"line": line_points})
    ds.commit()

    Point

    from starwhale import ds, Point

    with dataset("collections") as ds:
    ds.append(Point(x=0.0, y=100.0))
    ds.commit()

    Polygon

    from starwhale import ds, Point, Polygon

    with dataset("collections") as ds:
    polygon_points = [
    Point(x=0.0, y=1.0),
    Point(x=0.0, y=100.0),
    Point(x=2.0, y=1.0),
    Point(x=2.0, y=100.0),
    ]
    ds.append({"polygon": polygon_points})
    ds.commit()
    - + \ No newline at end of file diff --git a/reference/swcli/dataset/index.html b/reference/swcli/dataset/index.html index 6617200bf..3f031ae32 100644 --- a/reference/swcli/dataset/index.html +++ b/reference/swcli/dataset/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    swcli dataset

    Overview

    swcli [GLOBAL OPTIONS] dataset [OPTIONS] <SUBCOMMAND> [ARGS]...

    The dataset command includes the following subcommands:

    • build
    • copy(cp)
    • diff
    • head
    • history
    • info
    • list(ls)
    • recover
    • remove(rm)
    • summary
    • tag

    swcli dataset build

    swcli [GLOBAL OPTIONS] dataset build [OPTIONS]

    Build Starwhale Dataset. This command only supports to build standalone dataset.

    Options

    • Data sources options:
    OptionRequiredTypeDefaultsDescription
    -if or --image or --image-folderNStringBuild dataset from image folder, the folder should contain the image files.
    -af or --audio or --audio-folderNStringBuild dataset from audio folder, the folder should contain the audio files.
    -vf or --video or --video-folderNStringBuild dataset from video folder, the folder should contain the video files.
    -h or --handler or --python-handlerNStringBuild dataset from python executor handler, the handler format is [module path]:[class or func name].
    -f or --yaml or --dataset-yamlNdataset.yaml in cwdBuild dataset from dataset.yaml file. Default uses dataset.yaml in the work directory(cwd).
    -jf or --jsonNStringBuild dataset from json or jsonl file, the json or jsonl file option is a json file path or a http downloaded url.The json content structure should be a list[dict] or tuple[dict].
    -hf or --huggingfaceNStringBuild dataset from huggingface dataset, the huggingface option is a huggingface repo name.
    -c or --csvNStringBuild dataset from csv files. The option is a csv file path, dir path or a http downloaded url.The option can be used multiple times.

    Data source options are mutually exclusive, only one option is accepted. If no set, swcli dataset build command will use dataset yaml mode to build dataset with the dataset.yaml in the cwd.

    • Other options:
    OptionRequiredScopeTypeDefaultsDescription
    -pt or --patchone of --patch and --overwriteGlobalBooleanTruePatch mode, only update the changed rows and columns for the existed dataset.
    -ow or --overwriteone of --patch and --overwriteGlobalBooleanFalseOverwrite mode, update records and delete extraneous rows from the existed dataset.
    -n or --nameNGlobalStringDataset name
    -p or --projectNGlobalStringDefault projectProject URI, the default is the current selected project. The dataset will store in the specified project.
    -d or --descNGlobalStringDataset description
    -as or --alignment-sizeNGlobalString128Bswds-bin format dataset: alignment size
    -vs or --volume-sizeNGlobalString64MBswds-bin format dataset: volume size
    -r or --runtimeNGlobalStringRuntime URI
    -w or --workdirNPython Handler ModeStringcwdwork dir to search handler.
    --auto-label/--no-auto-labelNImage/Video/Audio Folder ModeBooleanTrueWhether to auto label by the sub-folder name.
    --field-selectorNJSON File ModeStringThe filed from which you would like to extract dataset array items. The filed is split by the dot(.) symbol.
    --subsetNHuggingface ModeStringHuggingface dataset subset name. If the subset name is not specified, the all subsets will be built.
    --splitNHuggingface ModeStringHuggingface dataset split name. If the split name is not specified, the all splits will be built.
    --revisionNHuggingface ModeStringmainVersion of the dataset script to load. Defaults to 'main'. The option value accepts tag name, or branch name, or commit hash.
    --add-hf-info/--no-add-hf-infoNHuggingface ModeBooleanTrueWhether to add huggingface dataset info to the dataset rows, currently support to add subset and split into the dataset rows. Subset uses _hf_subset field name, split uses _hf_split field name.
    --cache/--no-cacheNHuggingface ModeBooleanTrueWhether to use huggingface dataset cache(download + local hf dataset).
    -t or --tagNGlobalStringDataset tags, the option can be used multiple times.
    --encodingNCSV/JSON/JSONL ModeStringfile encoding.
    --dialectNCSV ModeStringexcelThe csv file dialect, the default is excel. Current supports excel, excel-tab and unix formats.
    --delimiterNCSV ModeString,A one-character string used to separate fields for the csv file.
    --quotecharNCSV ModeString"A one-character string used to quote fields containing special characters, such as the delimiter or quotechar, or which contain new-line characters.
    --skipinitialspace/--no-skipinitialspaceNCSV ModeBoolFalseWhether to skip spaces after delimiter for the csv file.
    --strict/--no-strictNCSV ModeBoolFalseWhen True, raise exception Error if the csv is not well formed.

    Examples for dataset building

    #- from dataset.yaml
    swcli dataset build # build dataset from dataset.yaml in the current work directory(pwd)
    swcli dataset build --yaml /path/to/dataset.yaml # build dataset from /path/to/dataset.yaml, all the involved files are related to the dataset.yaml file.
    swcli dataset build --overwrite --yaml /path/to/dataset.yaml # build dataset from /path/to/dataset.yaml, and overwrite the existed dataset.
    swcli dataset build --tag tag1 --tag tag2

    #- from handler
    swcli dataset build --handler mnist.dataset:iter_mnist_item # build dataset from mnist.dataset:iter_mnist_item handler, the workdir is the current work directory(pwd).
    # build dataset from mnist.dataset:LinkRawDatasetProcessExecutor handler, the workdir is example/mnist
    swcli dataset build --handler mnist.dataset:LinkRawDatasetProcessExecutor --workdir example/mnist

    #- from image folder
    swcli dataset build --image-folder /path/to/image/folder # build dataset from /path/to/image/folder, search all image type files.

    #- from audio folder
    swcli dataset build --audio-folder /path/to/audio/folder # build dataset from /path/to/audio/folder, search all audio type files.

    #- from video folder
    swcli dataset build --video-folder /path/to/video/folder # build dataset from /path/to/video/folder, search all video type files.

    #- from json/jsonl file
    swcli dataset build --json /path/to/example.json
    swcli dataset build --json http://example.com/example.json
    swcli dataset build --json /path/to/example.json --field-selector a.b.c # extract the json_content["a"]["b"]["c"] field from the json file.
    swcli dataset build --name qald9 --json https://raw.githubusercontent.com/ag-sc/QALD/master/9/data/qald-9-test-multilingual.json --field-selector questions
    swcli dataset build --json /path/to/test01.jsonl --json /path/to/test02.jsonl
    swcli dataset build --json https://modelscope.cn/api/v1/datasets/damo/100PoisonMpts/repo\?Revision\=master\&FilePath\=train.jsonl

    #- from huggingface dataset
    swcli dataset build --huggingface mnist
    swcli dataset build -hf mnist --no-cache
    swcli dataset build -hf cais/mmlu --subset anatomy --split auxiliary_train --revision 7456cfb

    #- from csv files
    swcli dataset build --csv /path/to/example.csv
    swcli dataset build --csv /path/to/example.csv --csv-file /path/to/example2.csv
    swcli dataset build --csv /path/to/csv-dir
    swcli dataset build --csv http://example.com/example.csv
    swcli dataset build --name product-desc-modelscope --csv https://modelscope.cn/api/v1/datasets/lcl193798/product_description_generation/repo\?Revision\=master\&FilePath\=test.csv --encoding=utf-8-sig

    swcli dataset copy

    swcli [GLOBAL OPTIONS] dataset copy [OPTIONS] <SRC> <DEST>

    dataset copy copies from SRC to DEST.

    SRC and DEST are both dataset URIs.

    When copying Starwhale Dataset, all custom user-defined labels will be copied by default. You can use the --ignore-tag parameter to ignore certain labels. In addition, the latest and ^v\d+$ labels are Starwhale built-in labels that are only used within the instance itself and will not be copied to other instances.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, DEST will be overwritten if it exists. In addition, if the labels carried during duplication have already been used by other versions, this parameter can be used to forcibly update the labels to this version.
    -p or --patchone of --patch and --overwriteBooleanTruePatch mode, only update the changed rows and columns for the remote dataset.
    -o or --overwriteone of --patch and --overwriteBooleanFalseOverwrite mode, update records and delete extraneous rows from the remote dataset.
    -i or --ignore-tagNStringIgnore tags to copy. The option can be used multiple times.

    Examples for dataset copy

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud dataset to local project(myproject) with a new dataset name 'mnist-local'
    swcli dataset cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq local/project/myproject/mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud dataset to local default project(self) with the cloud instance dataset name 'mnist-cloud'
    swcli dataset cp --patch cloud://pre-k8s/project/dataset/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq .

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud dataset to local project(myproject) with the cloud instance dataset name 'mnist-cloud'
    swcli dataset cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq . -dlp myproject

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud dataset to local default project(self) with a dataset name 'mnist-local'
    swcli dataset cp --overwrite cloud://pre-k8s/project/dataset/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud dataset to local project(myproject) with a dataset name 'mnist-local'
    swcli dataset cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local -dlp myproject

    #- copy standalone instance(local) default project(self)'s mnist-local dataset to cloud instance(pre-k8s) mnist project with a new dataset name 'mnist-cloud'
    swcli dataset cp mnist-local/version/latest cloud://pre-k8s/project/mnist/mnist-cloud

    #- copy standalone instance(local) default project(self)'s mnist-local dataset to cloud instance(pre-k8s) mnist project with standalone instance dataset name 'mnist-local'
    swcli dataset cp mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy standalone instance(local) default project(self)'s mnist-local dataset to cloud instance(pre-k8s) mnist project without 'cloud://' prefix
    swcli dataset cp mnist-local/version/latest pre-k8s/project/mnist

    #- copy standalone instance(local) project(myproject)'s mnist-local dataset to cloud instance(pre-k8s) mnist project with standalone instance dataset name 'mnist-local'
    swcli dataset cp local/project/myproject/dataset/mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy without some tags
    swcli dataset cp mnist cloud://cloud.starwhale.cn/project/starwhale:public --ignore-tag t1 --force

    swcli dataset diff

    swcli [GLOBAL OPTIONS] dataset diff [OPTIONS] <DATASET VERSION> <DATASET VERSION>

    dataset diff compares the difference between two versions of the same dataset.

    DATASET VERSION is a dataset URI.

    OptionRequiredTypeDefaultsDescription
    --show-detailsNBooleanFalseIf true, outputs the detail information.
    swcli [全局选项] dataset head [选项] <DATASET VERSION>

    Print the first n rows of the dataset. DATASET VERSION is a dataset URI.

    OptionRequiredTypeDefaultsDescription
    -n or --rowsNInt5Print the first NUM rows of the dataset.
    -srd or --show-raw-dataNBooleanFalseFetch raw data content from objectstore.
    -st or --show-typesNBooleanFalseshow data types.

    Examples for dataset head

    #- print the first 5 rows of the mnist dataset
    swcli dataset head -n 5 mnist

    #- print the first 10 rows of the mnist(v0 version) dataset and show raw data
    swcli dataset head -n 10 mnist/v0 --show-raw-data

    #- print the data types of the mnist dataset
    swcli dataset head mnist --show-types

    #- print the remote cloud dataset's first 5 rows
    swcli dataset head cloud://cloud-cn/project/test/dataset/mnist -n 5

    #- print the first 5 rows in the json format
    swcli -o json dataset head -n 5 mnist

    swcli dataset history

    swcli [GLOBAL OPTIONS] dataset history [OPTIONS] <DATASET>

    dataset history outputs all history versions of the specified Starwhale Dataset.

    DATASET is a dataset URI.

    OptionRequiredTypeDefaultsDescription
    --fullnameNBooleanFalseShow the full version name. Only the first 12 characters are shown if this option is false.

    swcli dataset info

    swcli [GLOBAL OPTIONS] dataset info [OPTIONS] <DATASET>

    dataset info outputs detailed information about the specified Starwhale Dataset version.

    DATASET is a dataset URI.

    swcli dataset list

    swcli [GLOBAL OPTIONS] dataset list [OPTIONS]

    dataset list shows all Starwhale Datasets.

    OptionRequiredTypeDefaultsDescription
    --projectNStringThe URI of the project to list. Use the default project if not specified.
    --fullnameNBooleanFalseShow the full version name. Only the first 12 characters are shown if this option is false.
    --show-removed or -srNBooleanFalseIf true, include datasets that are removed but not garbage collected.
    --pageNInteger1The starting page number. Server and cloud instances only.
    --sizeNInteger20The number of items in one page. Server and cloud instances only.
    --filter or -flNStringShow only Starwhale Datasetes that match specified filters. This option can be used multiple times in one command.
    FilterTypeDescriptionExample
    nameKey-ValueThe name prefix of datasets--filter name=mnist
    ownerKey-ValueThe dataset owner name--filter owner=starwhale
    latestFlagIf specified, it shows only the latest version.--filter latest

    swcli dataset recover

    swcli [GLOBAL OPTIONS] dataset recover [OPTIONS] <DATASET>

    dataset recover recovers previously removed Starwhale Datasets or versions.

    DATASET is a dataset URI. If the version part of the URI is omitted, all removed versions are recovered.

    Garbage-collected Starwhale Datasets or versions can not be recovered, as well as those are removed with the --force option.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, overwrite the Starwhale Dataset or version with the same name or version id.

    swcli dataset remove

    swcli [GLOBAL OPTIONS] dataset remove [OPTIONS] <DATASET>

    dataset remove removes the specified Starwhale Dataset or version.

    DATASET is a dataset URI. If the version part of the URI is omitted, all versions are removed.

    Removed Starwhale Datasets or versions can be recovered by swcli dataset recover before garbage collection. Use the --force option to persistently remove a Starwhale Dataset or version.

    Removed Starwhale Datasets or versions can be listed by swcli dataset list --show-removed.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, persistently delete the Starwhale Dataset or version. It can not be recovered.

    swcli dataset summary

    swcli [GLOBAL OPTIONS]  dataset summary <DATASET>

    Show dataset summary. DATASET is a dataset URI.

    swcli dataset tag

    swcli [GLOBAL OPTIONS] dataset tag [OPTIONS] <DATASET> [TAGS]...

    dataset tag attaches a tag to a specified Starwhale Dataset version. At the same time, tag command also supports list and remove tags. The tag can be used in a dataset URI instead of the version id.

    DATASET is a dataset URI.

    Each dataset version can have any number of tags, but duplicated tag names are not allowed in the same dataset.

    dataset tag only works for the Standalone Instance.

    OptionRequiredTypeDefaultsDescription
    --remove or -rNBooleanFalseremove the tag if true
    --quiet or -qNBooleanFalseignore errors, for example, removing tags that do not exist.
    --force-add or -fNBooleanFalseWhen adding labels to server/cloud instances, if the label is already used by another dataset version, an error will be prompted. In this case, you can force an update using the --force-add parameter.

    Examples for dataset tag

    #- list tags of the mnist dataset
    swcli dataset tag mnist

    #- add tags for the mnist dataset
    swcli dataset tag mnist t1 t2
    swcli dataset tag cloud://cloud.starwhale.cn/project/public:starwhale/dataset/mnist/version/latest t1 --force-ad
    swcli dataset tag mnist t1 --quiet

    #- remove tags for the mnist dataset
    swcli dataset tag mnist -r t1 t2
    swcli dataset tag cloud://cloud.starwhale.cn/project/public:starwhale/dataset/mnist --remove t1
    - + \ No newline at end of file diff --git a/reference/swcli/index.html b/reference/swcli/index.html index 8b13d0125..d5fa378dc 100644 --- a/reference/swcli/index.html +++ b/reference/swcli/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    Overview

    Usage

    swcli [OPTIONS] <COMMAND> [ARGS]...
    note

    sw and starwhale are aliases for swcli.

    Global Options

    OptionDescription
    --versionShow the Starwhale Client version
    -v or --verboseShow verbose log, support multi counts for -v args. More -v args, more logs.
    --helpShow the help message.
    caution

    Global options must be put immediately after swcli, and before any command.

    Commands

    - + \ No newline at end of file diff --git a/reference/swcli/instance/index.html b/reference/swcli/instance/index.html index 72295d2c5..ffe2ef9bb 100644 --- a/reference/swcli/instance/index.html +++ b/reference/swcli/instance/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    swcli instance

    Overview

    swcli [GLOBAL OPTIONS] instance [OPTIONS] <SUBCOMMAND> [ARGS]

    The instance command includes the following subcommands:

    • info
    • list (ls)
    • login
    • logout
    • use (select)

    swcli instance info

    swcli [GLOBAL OPTIONS] instance info [OPTIONS] <INSTANCE>

    instance info outputs detailed information about the specified Starwhale Instance.

    INSTANCE is an instance URI.

    swcli instance list

    swcli [GLOBAL OPTIONS] instance list [OPTIONS]

    instance list shows all Starwhale Instances.

    swcli instance login

    swcli [GLOBAL OPTIONS] instance login [OPTIONS] <INSTANCE>

    instance login connects to a Server/Cloud instance and makes the specified instance default.

    INSTANCE is an instance URI.

    OptionRequiredTypeDefaultsDescription
    --usernameNStringThe login username.
    --passwordNStringThe login password.
    --tokenNStringThe login token.
    --aliasYStringThe alias of the instance. You can use it anywhere that requires an instance URI.

    --username and --password can not be used together with --token.

    swcli instance logout

    swcli [GLOBAL OPTIONS] instance logout [INSTANCE]

    instance logout disconnects from the Server/Cloud instance, and clears information stored in the local storage.

    INSTANCE is an instance URI. If it is omiited, the default instance is used instead.

    swcli instance use

    swcli [GLOBAL OPTIONS] instance use <INSTANCE>

    instance use make the specified instance default.

    INSTANCE is an instance URI.

    - + \ No newline at end of file diff --git a/reference/swcli/job/index.html b/reference/swcli/job/index.html index bd71d4a31..562eaf713 100644 --- a/reference/swcli/job/index.html +++ b/reference/swcli/job/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    swcli job

    Overview

    swcli [GLOBAL OPTIONS] job [OPTIONS] <SUBCOMMAND> [ARGS]...

    The job command includes the following subcommands:

    • cancel
    • info
    • list(ls)
    • pause
    • recover
    • remove(rm)
    • resume

    swcli job cancel

    swcli [GLOBAL OPTIONS] job cancel [OPTIONS] <JOB>

    job cancel stops the specified job. On Standalone instance, this command only takes effect for containerized jobs.

    JOB is a job URI.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, kill the Starwhale Job by force.

    swcli job info

    swcli [GLOBAL OPTIONS] job info [OPTIONS] <JOB>

    job info outputs detailed information about the specified Starwhale Job.

    JOB is a job URI.

    swcli job list

    swcli [GLOBAL OPTIONS] job list [OPTIONS]

    job list shows all Starwhale Jobs.

    OptionRequiredTypeDefaultsDescription
    --projectNStringThe URI of the project to list. Use the default project if not specified.
    --show-removed or -srNBooleanFalseIf true, include packages that are removed but not garbage collected.
    --pageNInteger1The starting page number. Server and cloud instances only.
    --sizeNInteger20The number of items in one page. Server and cloud instances only.

    swcli job pause

    swcli [GLOBAL OPTIONS] job pause [OPTIONS] <JOB>

    job pause pauses the specified job. Paused jobs can be resumed by job resume. On Standalone instance, this command only takes effect for containerized jobs.

    JOB is a job URI.

    From Starwhale's perspective, pause is almost the same as cancel, except that the job reuses the old Job id when resumed. It is job developer's responsibility to save all data periodically and load them when resumed. The job id is usually used as a key of the checkpoint.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, kill the Starwhale Job by force.

    swcli job resume

    swcli [GLOBAL OPTIONS] job resume [OPTIONS] <JOB>

    job resume resumes the specified job. On Standalone instance, this command only takes effect for containerized jobs.

    JOB is a job URI.

    - + \ No newline at end of file diff --git a/reference/swcli/model/index.html b/reference/swcli/model/index.html index 0925f9a5f..d0f76ddce 100644 --- a/reference/swcli/model/index.html +++ b/reference/swcli/model/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    swcli model

    Overview

    swcli [GLOBAL OPTIONS] model [OPTIONS] <SUBCOMMAND> [ARGS]...

    The model command includes the following subcommands:

    • build
    • copy(cp)
    • diff
    • extract
    • history
    • info
    • list(ls)
    • recover
    • remove(rm)
    • run
    • serve
    • tag

    swcli model build

    swcli [GLOBAL OPTIONS] model build [OPTIONS] <WORKDIR>

    model build will put the whole WORKDIR into the model, except files that match patterns defined in .swignore.

    model build will import modules specified by --module to generate the required configurations to run the model. If your module depends on third-party libraries, we strongly recommend you use the --runtime option; otherwise, you need to ensure that the python environment used by swcli has these libraries installed.

    OptionRequiredTypeDefaultsDescription
    --project or -pNStringthe default projectthe project URI
    --model-yaml or -fNString${workdir}/model.yamlmodel yaml path, default use ${workdir}/model.yaml file. model.yaml is optional for model build.
    --module or -mNStringPython modules to be imported during the build process. Starwhale will export model handlers from these modules to the model package. This option supports set multiple times.
    --runtime or -rNStringthe URI of the Starwhale Runtime to use when running this command. If this option is used, this command will run in an independent python environment specified by the Starwhale Runtime; otherwise, it will run directly in the swcli's current python environment.
    --name or -nNStringmodel package name
    --desc or -dNStringmodel package description
    --package-runtime--no-package-runtimeNBooleanTrueWhen using the --runtime parameter, by default, the corresponding Starwhale runtime will become the built-in runtime for the Starwhale model. This feature can be disabled with the --no-package-runtime parameter.
    --add-allNBooleanFalseAdd all files in the working directory to the model package(excludes python cache files and virtual environment files when disabled).The .swignore file still takes effect.
    -t or --tagNGlobalString

    Examples for model build

    # build by the model.yaml in current directory and model package will package all the files from the current directory.
    swcli model build .
    # search model run decorators from mnist.evaluate, mnist.train and mnist.predict modules, then package all the files from the current directory to model package.
    swcli model build . --module mnist.evaluate --module mnist.train --module mnist.predict
    # build model package in the Starwhale Runtime environment.
    swcli model build . --module mnist.evaluate --runtime pytorch/version/v1
    # forbid to package Starwhale Runtime into the model.
    swcli model build . --module mnist.evaluate --runtime pytorch/version/v1 --no-package-runtime
    # build model package with tags.
    swcli model build . --tag tag1 --tag tag2

    swcli model copy

    swcli [GLOBAL OPTIONS] model copy [OPTIONS] <SRC> <DEST>

    model copy copies from SRC to DEST for Starwhale Model sharing.

    SRC and DEST are both model URIs.

    When copying Starwhale Model, all custom user-defined labels will be copied by default. You can use the --ignore-tag parameter to ignore certain labels. In addition, the latest and ^v\d+$ labels are Starwhale built-in labels that are only used within the instance itself and will not be copied to other instances.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, DEST will be overwritten if it exists. In addition, if the labels carried during duplication have already been used by other versions, this parameter can be used to forcibly update the labels to this version.
    -i or --ignore-tagNStringIgnore tags to copy. The option can be used multiple times.

    Examples for model copy

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local project(myproject) with a new model name 'mnist-local'
    swcli model cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq local/project/myproject/mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local default project(self) with the cloud instance model name 'mnist-cloud'
    swcli model cp cloud://pre-k8s/project/model/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq .

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local project(myproject) with the cloud instance model name 'mnist-cloud'
    swcli model cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq . -dlp myproject

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local default project(self) with a model name 'mnist-local'
    swcli model cp cloud://pre-k8s/project/model/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local project(myproject) with a model name 'mnist-local'
    swcli model cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local -dlp myproject

    #- copy standalone instance(local) default project(self)'s mnist-local model to cloud instance(pre-k8s) mnist project with a new model name 'mnist-cloud'
    swcli model cp mnist-local/version/latest cloud://pre-k8s/project/mnist/mnist-cloud

    #- copy standalone instance(local) default project(self)'s mnist-local model to cloud instance(pre-k8s) mnist project with standalone instance model name 'mnist-local'
    swcli model cp mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy standalone instance(local) default project(self)'s mnist-local model to cloud instance(pre-k8s) mnist project without 'cloud://' prefix
    swcli model cp mnist-local/version/latest pre-k8s/project/mnist

    #- copy standalone instance(local) project(myproject)'s mnist-local model to cloud instance(pre-k8s) mnist project with standalone instance model name 'mnist-local'
    swcli model cp local/project/myproject/model/mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy without some tags
    swcli model cp mnist cloud://cloud.starwhale.cn/project/starwhale:public --ignore-tag t1

    swcli model diff

    swcli [GLOBAL OPTIONS] model diff [OPTIONS] <MODEL VERSION> <MODEL VERSION>

    model diff compares the difference between two versions of the same model.

    MODEL VERSION is a model URI.

    OptionRequiredTypeDefaultsDescription
    --show-detailsNBooleanFalseIf true, outputs the detail information.

    swcli model extract

    swcli [GLOBAL OPTIONS] model extract [OPTIONS] <MODEL> <TARGET_DIR>

    The model extract command can extract a Starwhale model to a specified directory for further customization.

    MODEL is a model URI.

    OptionRequiredTypeDefaultDescription
    --force or -fNBooleanFalseIf this option is used, it will forcibly overwrite existing extracted model files in the target directory.

    Examples for model extract

    #- extract mnist model package to current directory
    swcli model extract mnist/version/xxxx .

    #- extract mnist model package to current directory and force to overwrite the files
    swcli model extract mnist/version/xxxx . -f

    swcli model history

    swcli [GLOBAL OPTIONS] model history [OPTIONS] <MODEL>

    model history outputs all history versions of the specified Starwhale Model.

    MODEL is a model URI.

    OptionRequiredTypeDefaultsDescription
    --fullnameNBooleanFalseShow the full version name. Only the first 12 characters are shown if this option is false.

    swcli model info

    swcli [GLOBAL OPTIONS] model info [OPTIONS] <MODEL>

    model info outputs detailed information about the specified Starwhale Model version.

    MODEL is a model URI.

    OptionRequiredTypeDefaultsDescription
    --output-filter or -ofNChoice of [basic/model_yaml/manifest/files/handlers/all]basicFilter the output content. Only standalone instance supports this option.

    Examples for model info

    swcli model info mnist # show basic info from the latest version of model
    swcli model info mnist/version/v0 # show basic info from the v0 version of model
    swcli model info mnist/version/latest --output-filter=all # show all info
    swcli model info mnist -of basic # show basic info
    swcli model info mnist -of model_yaml # show model.yaml
    swcli model info mnist -of handlers # show model runnable handlers info
    swcli model info mnist -of files # show model package files tree
    swcli -o json model info mnist -of all # show all info in json format

    swcli model list

    swcli [GLOBAL OPTIONS] model list [OPTIONS]

    model list shows all Starwhale Models.

    OptionRequiredTypeDefaultsDescription
    --projectNStringThe URI of the project to list. Use the default project if not specified.
    --fullnameNBooleanFalseShow the full version name. Only the first 12 characters are shown if this option is false.
    --show-removedNBooleanFalseIf true, include packages that are removed but not garbage collected.
    --pageNInteger1The starting page number. Server and cloud instances only.
    --sizeNInteger20The number of items in one page. Server and cloud instances only.
    --filter or -flNStringShow only Starwhale Models that match specified filters. This option can be used multiple times in one command.
    FilterTypeDescriptionExample
    nameKey-ValueThe name prefix of models--filter name=mnist
    ownerKey-ValueThe model owner name--filter owner=starwhale
    latestFlagIf specified, it shows only the latest version.--filter latest

    swcli model recover

    swcli [GLOBAL OPTIONS] model recover [OPTIONS] <MODEL>

    model recover recovers previously removed Starwhale Models or versions.

    MODEL is a model URI. If the version part of the URI is omitted, all removed versions are recovered.

    Garbage-collected Starwhale Models or versions can not be recovered, as well as those are removed with the --force option.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, overwrite the Starwhale Model or version with the same name or version id.

    swcli model remove

    swcli [GLOBAL OPTIONS] model remove [OPTIONS] <MODEL>

    model remove removes the specified Starwhale Model or version.

    MODEL is a model URI. If the version part of the URI is omitted, all versions are removed.

    Removed Starwhale Models or versions can be recovered by swcli model recover before garbage collection. Use the --force option to persistently remove a Starwhale Model or version.

    Removed Starwhale Models or versions can be listed by swcli model list --show-removed.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, persistently delete the Starwhale Model or version. It can not be recovered.

    swcli model run

    swcli [GLOBAL OPTIONS] model run [OPTIONS]

    model run executes a model handler. Model run supports two modes to run: model URI and local development. Model URI mode needs a pre-built Starwhale Model Package. Local development model only needs the model src dir.

    OptionRequiredTypeDefaultsDescription
    --workdir or -wNStringFor local development mode, the path of model src dir.
    --uri or -uNStringFor model URI mode, the string of model uri.
    --handler or -hNStringRunnable handler index or name, default is None, will use the first handler
    --module or -mNStringThe name of the Python module to import. This parameter can be set multiple times.
    --runtime or -rNStringthe Starwhale Runtime URI to use when running this command. If this option is used, this command will run in an independent python environment specified by the Starwhale Runtime; otherwise, it will run directly in the swcli's current python environment.
    --model-yaml or -fNString${MODEL_DIR}/model.yamlThe path to the model.yaml. model.yaml is optional for model run.
    --run-project or -pNStringDefault projectProject URI, indicates the model run results will be stored in the corresponding project.
    --dataset or -dNStringDataset URI, the Starwhale dataset required for model running. This parameter can be set multiple times.
    --dataset-head or -dhNInteger0[ONLY STANDALONE]For debugging purpose, every prediction task will, at most, consume the first n rows from every dataset.When the value is less than or equal to 0, all samples will be used.
    --in-containerNBooleanFalseUse docker container to run the model. This option is only available for standalone instances. For server and cloud instances, a docker image is always used. If the runtime is a docker image, this option is always implied.
    --forbid-snapshot or -fsNBooleanFalseIn model URI mode, each model run uses a new snapshot directory. Setting this parameter will directly use the model's workdir as the run directory. In local dev mode, this parameter does not take effect, each run is in the --workdir specified directory.
    -- --user-arbitrary-argsNStringSpecify the args you defined in your handlers.

    Examples for model run

    # --> run by model uri
    # run the first handler from model uri
    swcli model run -u mnist/version/latest
    # run index id(1) handler from model uri
    swcli model run --uri mnist/version/latest --handler 1
    # run index fullname(mnist.evaluator:MNISTInference.cmp) handler from model uri
    swcli model run --uri mnist/version/latest --handler mnist.evaluator:MNISTInference.cmp

    # --> run by the working directory, which does not build model package yet. Make local debug happy.
    # run the first handler from the working directory, use the model.yaml in the working directory
    swcli model run -w .
    # run index id(1) handler from the working directory, search mnist.evaluator module and model.yaml handlers(if existed) to get runnable handlers
    swcli model run --workdir . --module mnist.evaluator --handler 1
    # run index fullname(mnist.evaluator:MNISTInference.cmp) handler from the working directory, search mnist.evaluator module to get runnable handlers
    swcli model run --workdir . --module mnist.evaluator --handler mnist.evaluator:MNISTInference.cmp
    # run the f handler in th.py from the working directory with the args defined in th:f
    # @handler()
    # def f(
    # x=ListInput(IntInput()),
    # y=2,
    # mi=MyInput(),
    # ds=DatasetInput(required=True),
    # ctx=ContextInput(),
    # )
    swcli model run -w . -m th --handler th:f -- -x 2 -x=1 --mi=blab-la --ds mnist

    # --> run with dataset of head 10
    swcli model run --uri mnist --dataset-head 10 --dataset mnist

    swcli model serve

    Here is the English translation:

    swcli [GLOBAL OPTIONS] model serve [OPTIONS]

    The model serve command can run the model as a web server, and provide a simple web interaction interface.

    OptionRequiredTypeDefaultsDescription
    --workdir or -wNStringIn local dev mode, specify the directory of the model code.
    --uri or -uNStringIn model URI mode, specify the model URI.
    --runtime or -rNStringThe URI of the Starwhale runtime to use when running this command. If specified, the command will run in the isolated Python environment defined in the Starwhale runtime. Otherwise it will run directly in the current Python environment of swcli.
    --model-yaml or -fNString${MODEL_DIR}/model.yamlThe path to the model.yaml. model.yaml is optional for model serve.
    --module or -mNStringName of the Python module to import. This parameter can be set multiple times.
    --hostNString127.0.0.1The address for the service to listen on.
    --portNInteger8080The port for the service to listen on.

    Examples for model serve

    swcli model serve -u mnist
    swcli model serve --uri mnist/version/latest --runtime pytorch/version/latest

    swcli model serve --workdir . --runtime pytorch/version/v0
    swcli model serve --workdir . --runtime pytorch/version/v1 --host 0.0.0.0 --port 8080
    swcli model serve --workdir . --runtime pytorch --module mnist.evaluator

    swcli model tag

    swcli [GLOBAL OPTIONS] model tag [OPTIONS] <MODEL> [TAGS]...

    model tag attaches a tag to a specified Starwhale Model version. At the same time, tag command also supports list and remove tags. The tag can be used in a model URI instead of the version id.

    MODEL is a model URI.

    Each model version can have any number of tags, but duplicated tag names are not allowed in the same model.

    model tag only works for the Standalone Instance.

    OptionRequiredTypeDefaultsDescription
    --remove or -rNBooleanFalseremove the tag if true
    --quiet or -qNBooleanFalseignore errors, for example, removing tags that do not exist.
    --force-add or -fNBooleanFalseWhen adding labels to server/cloud instances, if the label is already used by another model version, an error will be prompted. In this case, you can force an update using the --force-add parameter.

    Examples for model tag

    #- list tags of the mnist model
    swcli model tag mnist

    #- add tags for the mnist model
    swcli model tag mnist t1 t2
    swcli model tag cloud://cloud.starwhale.cn/project/public:starwhale/model/mnist/version/latest t1 --force-add
    swcli model tag mnist t1 --quiet

    #- remove tags for the mnist model
    swcli model tag mnist -r t1 t2
    swcli model tag cloud://cloud.starwhale.cn/project/public:starwhale/model/mnist --remove t1
    - + \ No newline at end of file diff --git a/reference/swcli/project/index.html b/reference/swcli/project/index.html index 39e36abd5..df7435440 100644 --- a/reference/swcli/project/index.html +++ b/reference/swcli/project/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    swcli project

    Overview

    swcli [GLOBAL OPTIONS] project [OPTIONS] <SUBCOMMAND> [ARGS]...

    The project command includes the following subcommands:

    • create(add, new)
    • info
    • list(ls)
    • recover
    • remove(ls)
    • use(select)

    swcli project create

    swcli [GLOBAL OPTIONS] project create <PROJECT>

    project create creates a new project.

    PROJECT is a project URI.

    swcli project info

    swcli [GLOBAL OPTIONS] project info [OPTIONS] <PROJECT>

    project info outputs detailed information about the specified Starwhale Project.

    PROJECT is a project URI.

    swcli project list

    swcli [GLOBAL OPTIONS] project list [OPTIONS]

    project list shows all Starwhale Projects.

    OptionRequiredTypeDefaultsDescription
    --instanceNStringThe URI of the instance to list. If this option is omitted, use the default instance.
    --show-removedNBooleanFalseIf true, include projects that are removed but not garbage collected.
    --pageNInteger1The starting page number. Server and cloud instances only.
    --sizeNInteger20The number of items in one page. Server and cloud instances only.

    swcli project recover

    swcli [GLOBAL OPTIONS] project recover [OPTIONS] <PROJECT>

    project recover recovers previously removed Starwhale Projects.

    PROJECT is a project URI.

    Garbage-collected Starwhale Projects can not be recovered, as well as those are removed with the --force option.

    swcli project remove

    swcli [GLOBAL OPTIONS] project remove [OPTIONS] <PROJECT>

    project remove removes the specified Starwhale Project.

    PROJECT is a project URI.

    Removed Starwhale Projects can be recovered by swcli project recover before garbage collection. Use the --force option to persistently remove a Starwhale Project.

    Removed Starwhale Project can be listed by swcli project list --show-removed.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, persistently delete the Starwhale Project. It can not be recovered.

    swcli project use

    swcli [GLOBAL OPTIONS] project use <PROJECT>

    project use make the specified project default. You must login at first to use a project on a Server/Cloud instance.

    - + \ No newline at end of file diff --git a/reference/swcli/runtime/index.html b/reference/swcli/runtime/index.html index 667658254..6e3769b35 100644 --- a/reference/swcli/runtime/index.html +++ b/reference/swcli/runtime/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    swcli runtime

    Overview

    swcli [GLOBAL OPTIONS] runtime [OPTIONS] <SUBCOMMAND> [ARGS]...

    The runtime command includes the following subcommands:

    • activate(actv)
    • build
    • copy(cp)
    • dockerize
    • extract
    • history
    • info
    • list(ls)
    • recover
    • remove(rm)
    • tag

    swcli runtime activate

    swcli [GLOBAL OPTIONS] runtime activate [OPTIONS] <RUNTIME>

    Like source venv/bin/activate or conda activate xxx, runtime activate setups a new python environment according to the settings of the specified runtime. When the current shell is closed or switched to another one, you need to reactivate the runtime.RUNTIME is a Runtime URI.

    If you want to quit the activated runtime environment, please run venv deactivate in the venv environment or conda deactivate in the conda environment.

    The runtime activate command will build a Python isolated environment and download relevant Python packages according to the definition of the Starwhale runtime when activating the environment for the first time. This process may spend a lot of time.

    swcli runtime build

    swcli [GLOBAL OPTIONS] runtime build [OPTIONS]

    The runtime build command can build a shareable and reproducible runtime environment suitable for ML/DL from various environments or runtime.yaml file.

    Parameters

    • Parameters related to runtime building methods:
    OptionRequiredTypeDefaultsDescription
    -c or --condaNStringFind the corresponding conda environment by conda env name, export Python dependencies to generate Starwhale runtime.
    -cp or --conda-prefixNStringFind the corresponding conda environment by conda env prefix path, export Python dependencies to generate Starwhale runtime.
    -v or --venvNStringFind the corresponding venv environment by venv directory address, export Python dependencies to generate Starwhale runtime.
    -s or --shellNStringExport Python dependencies according to current shell environment to generate Starwhale runtime.
    -y or --yamlNruntime.yaml in cwd directoryBuild Starwhale runtime according to user-defined runtime.yaml.
    -d or --dockerNStringUse the docker image as Starwhale runtime.

    The parameters for runtime building methods are mutually exclusive, only one method can be specified. If not specified, it will use --yaml method to read runtime.yaml in cwd directory to build Starwhale runtime.

    • Other parameters:
    OptionRequiredScopeTypeDefaultsDescription
    --project or -pNGlobalStringDefault projectProject URI
    -del or --disable-env-lockNruntime.yaml modeBooleanFalseWhether to install dependencies in runtime.yaml and lock the version information of related dependencies. The dependencies will be locked by default.
    -nc or --no-cacheNruntime.yaml modeBooleanFalseWhether to delete the isolated environment and install related dependencies from scratch. By default dependencies will be installed in the existing isolated environment.
    --cudaNconda/venv/shell modeChoice[11.3/11.4/11.5/11.6/11.7/]CUDA version, CUDA will not be used by default.
    --cudnnNconda/venv/shell modeChoice[8/]cuDNN version, cuDNN will not be used by default.
    --archNconda/venv/shell modeChoice[amd64/arm64/noarch]noarchArchitecture
    -dpo or --dump-pip-optionsNGlobalBooleanFalseDump pip config options from the ~/.pip/pip.conf file.
    -dcc or --dump-condarcNGlobalBooleanFalseDump conda config from the ~/.condarc file.
    -t or --tagNGlobalStringRuntime tags, the option can be used multiple times.

    Examples for Starwhale Runtime building

    #- from runtime.yaml:
    swcli runtime build # use the current directory as the workdir and use the default runtime.yaml file
    swcli runtime build -y example/pytorch/runtime.yaml # use example/pytorch/runtime.yaml as the runtime.yaml file
    swcli runtime build --yaml runtime.yaml # use runtime.yaml at the current directory as the runtime.yaml file
    swcli runtime build --tag tag1 --tag tag2

    #- from conda name:
    swcli runtime build -c pytorch # lock pytorch conda environment and use `pytorch` as the runtime name
    swcli runtime build --conda pytorch --name pytorch-runtime # use `pytorch-runtime` as the runtime name
    swcli runtime build --conda pytorch --cuda 11.4 # specify the cuda version
    swcli runtime build --conda pytorch --arch noarch # specify the system architecture

    #- from conda prefix path:
    swcli runtime build --conda-prefix /home/starwhale/anaconda3/envs/pytorch # get conda prefix path by `conda info --envs` command

    #- from venv prefix path:
    swcli runtime build -v /home/starwhale/.virtualenvs/pytorch
    swcli runtime build --venv /home/starwhale/.local/share/virtualenvs/pytorch --arch amd64

    #- from docker image:
    swcli runtime build --docker pytorch/pytorch:1.9.0-cuda11.1-cudnn8-runtime # use the docker image as the runtime directly

    #- from shell:
    swcli runtime build -s --cuda 11.4 --cudnn 8 # specify the cuda and cudnn version
    swcli runtime build --shell --name pytorch-runtime # lock the current shell environment and use `pytorch-runtime` as the runtime name

    swcli runtime copy

    swcli [GLOBAL OPTIONS] runtime copy [OPTIONS] <SRC> <DEST>

    runtime copy copies from SRC to DEST. SRC and DEST are both Runtime URIs.

    When copying Starwhale Runtime, all custom user-defined labels will be copied by default. You can use the --ignore-tag parameter to ignore certain labels. In addition, the latest and ^v\d+$ labels are built-in Starwhale system labels that are only used within the instance itself and will not be copied to other instances.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, DEST will be overwritten if it exists. In addition, if the labels carried during duplication have already been used by other versions, this parameter can be used to forcibly update the labels to this version.
    -i or --ignore-tagNStringIgnore tags to copy. The option can be used multiple times.

    Examples for Starwhale Runtime copy

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local project(myproject) with a new runtime name 'mnist-local'
    swcli runtime cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq local/project/myproject/mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local default project(self) with the cloud instance runtime name 'mnist-cloud'
    swcli runtime cp cloud://pre-k8s/project/runtime/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq .

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local project(myproject) with the cloud instance runtime name 'mnist-cloud'
    swcli runtime cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq . -dlp myproject

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local default project(self) with a runtime name 'mnist-local'
    swcli runtime cp cloud://pre-k8s/project/runtime/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local project(myproject) with a runtime name 'mnist-local'
    swcli runtime cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local -dlp myproject

    #- copy standalone instance(local) default project(self)'s mnist-local runtime to cloud instance(pre-k8s) mnist project with a new runtime name 'mnist-cloud'
    swcli runtime cp mnist-local/version/latest cloud://pre-k8s/project/mnist/mnist-cloud

    #- copy standalone instance(local) default project(self)'s mnist-local runtime to cloud instance(pre-k8s) mnist project with standalone instance runtime name 'mnist-local'
    swcli runtime cp mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy standalone instance(local) default project(self)'s mnist-local runtime to cloud instance(pre-k8s) mnist project without 'cloud://' prefix
    swcli runtime cp mnist-local/version/latest pre-k8s/project/mnist

    #- copy standalone instance(local) project(myproject)'s mnist-local runtime to cloud instance(pre-k8s) mnist project with standalone instance runtime name 'mnist-local'
    swcli runtime cp local/project/myproject/runtime/mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy without some tags
    swcli runtime cp pytorch cloud://cloud.starwhale.cn/project/starwhale:public --ignore-tag t1

    swcli runtime dockerize

    swcli [GLOBAL OPTIONS] runtime dockerize [OPTIONS] <RUNTIME>

    runtime dockerize generates a docker image based on the specified runtime. Starwhale uses docker buildx to create the image. Docker 19.03 or later is required to run this command.

    RUNTIME is a Runtime URI.

    OptionRequiredTypeDefaultsDescription
    --tag or -tNStringThe tag of the docker image. This option can be repeated multiple times.
    --pushNBooleanFalseIf true, push the image to the docker registry
    --platformNStringamd64The target platform,can be either amd64 or arm64. This option can be repeated multiple times to create a multi-platform image.

    Here is the English translation:

    swcli runtime extract

    swcli [Global Options] runtime extract [Options] <RUNTIME>

    Starwhale runtimes use the compressed packages to distribute. The runtime extract command can be used to extract the runtime package for further customization and modification.

    OptionRequiredTypeDefaultDescription
    --force or -fNBooleanFalseWhether to delete and re-extract if there is already an extracted Starwhale runtime in the target directory.
    --target-dirNStringCustom extraction directory. If not specified, it will be extracted to the default Starwhale runtime workdir. The command log will show the directory location.

    swcli runtime history

    swcli [GLOBAL OPTIONS] runtime history [OPTIONS] <RUNTIME>

    runtime history outputs all history versions of the specified Starwhale Runtime.

    RUNTIME is a Runtime URI.

    OptionRequiredTypeDefaultsDescription
    --fullnameNBooleanFalseShow the full version name. Only the first 12 characters are shown if this option is false.

    swcli runtime info

    swcli [GLOBAL OPTIONS] runtime info [OPTIONS] <RUNTIME>

    runtime info outputs detailed information about a specified Starwhale Runtime version.

    RUNTIME is a Runtime URI.

    OptionRequiredTypeDefaultsDescription
    --output-filter or -ofNChoice of [basic/runtime_yaml/manifest/lock/all]basicFilter the output content. Only standalone instance supports this option.

    Examples for Starwhale Runtime info

    swcli runtime info pytorch # show basic info from the latest version of runtime
    swcli runtime info pytorch/version/v0 # show basic info
    swcli runtime info pytorch/version/v0 --output-filter basic # show basic info
    swcli runtime info pytorch/version/v1 -of runtime_yaml # show runtime.yaml content
    swcli runtime info pytorch/version/v1 -of lock # show auto lock file content
    swcli runtime info pytorch/version/v1 -of manifest # show _manifest.yaml content
    swcli runtime info pytorch/version/v1 -of all # show all info of the runtime

    swcli runtime list

    swcli [GLOBAL OPTIONS] runtime list [OPTIONS]

    runtime list shows all Starwhale Runtimes.

    OptionRequiredTypeDefaultsDescription
    --projectNStringThe URI of the project to list. Use the default project if not specified.
    --fullnameNBooleanFalseShow the full version name. Only the first 12 characters are shown if this option is false.
    --show-removed or -srNBooleanFalseIf true, include runtimes that are removed but not garbage collected.
    --pageNInteger1The starting page number. Server and cloud instances only.
    --sizeNInteger20The number of items in one page. Server and cloud instances only.
    --filter or -flNStringShow only Starwhale Runtimes that match specified filters. This option can be used multiple times in one command.
    FilterTypeDescriptionExample
    nameKey-ValueThe name prefix of runtimes--filter name=pytorch
    ownerKey-ValueThe runtime owner name--filter owner=starwhale
    latestFlagIf specified, it shows only the latest version.--filter latest

    swcli runtime recover

    swcli [GLOBAL OPTIONS] runtime recover [OPTIONS] <RUNTIME>

    runtime recover can recover previously removed Starwhale Runtimes or versions.

    RUNTIME is a Runtime URI. If the version part of the URI is omitted, all removed versions are recovered.

    Garbage-collected Starwhale Runtimes or versions can not be recovered, as well as those are removed with the --force option.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, overwrite the Starwhale Runtime or version with the same name or version id.

    swcli runtime remove

    swcli [GLOBAL OPTIONS] runtime remove [OPTIONS] <RUNTIME>

    runtime remove removes the specified Starwhale Runtime or version.

    RUNTIME is a Runtime URI. If the version part of the URI is omitted, all versions are removed.

    Removed Starwhale Runtimes or versions can be recovered by swcli runtime recover before garbage collection. Use the -- force option to persistently remove a Starwhale Runtime or version.

    Removed Starwhale Runtimes or versions can be listed by swcli runtime list --show-removed.

    OptionRequiredTypeDefaultsDescription
    --force or -fNBooleanFalseIf true, persistently delete the Starwhale Runtime or version. It can not be recovered.

    swcli runtime tag

    swcli [GLOBAL OPTIONS] runtime tag [OPTIONS] <RUNTIME> [TAGS]...

    runtime tag attaches a tag to a specified Starwhale Runtime version. At the same time, tag command also supports list and remove tags. The tag can be used in a runtime URI instead of the version id.

    RUNTIME is a Runtime URI.

    Each runtime version can have any number of tags, but duplicated tag names are not allowed in the same runtime.

    runtime tag only works for the Standalone Instance.

    OptionRequiredTypeDefaultsDescription
    --remove or -rNBooleanFalseRemove the tag if true
    --quiet or -qNBooleanFalseIgnore errors, for example, removing tags that do not exist.
    --force-add or -fNBooleanFalseWhen adding labels to server/cloud instances, if the label is already used by another runtime version, an error will be prompted. In this case, you can force an update using the --force-add parameter.

    Examples for runtime tag

    #- list tags of the pytorch runtime
    swcli runtime tag pytorch

    #- add tags for the pytorch runtime
    swcli runtime tag mnist t1 t2
    swcli runtime tag cloud://cloud.starwhale.cn/project/public:starwhale/runtime/pytorch/version/latest t1 --force-add
    swcli runtime tag mnist t1 --quiet

    #- remove tags for the pytorch runtime
    swcli runtime tag mnist -r t1 t2
    swcli runtime tag cloud://cloud.starwhale.cn/project/public:starwhale/runtime/pytorch --remove t1
    - + \ No newline at end of file diff --git a/reference/swcli/utilities/index.html b/reference/swcli/utilities/index.html index 3784b64bc..3e662b661 100644 --- a/reference/swcli/utilities/index.html +++ b/reference/swcli/utilities/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    Utility Commands

    swcli gc

    swcli [GLOBAL OPTIONS] gc [OPTIONS]

    gc clears removed projects, models, datasets, and runtimes according to the internal garbage collection policy.

    OptionRequiredTypeDefaultsDescription
    --dry-runNBooleanFalseIf true, outputs objects to be removed instead of clearing them.
    --yesNBooleanFalseBypass confirmation prompts.

    swcli check

    swcli [GLOBAL OPTIONS] check

    Check if the external dependencies of the swcli command meet the requirements. Currently mainly checks Docker and Conda.

    swcli completion install

    swcli [GLOBAL OPTIONS] completion install <SHELL_NAME>

    Install autocompletion for swcli commands. Currently supports bash, zsh and fish. If SHELL_NAME is not specified, it will try to automatically detect the current shell type.

    swcli config edit

    swcli [GLOBAL OPTIONS] config edit

    Edit the Starwhale configuration file at ~/.config/starwhale/config.yaml.

    swcli ui

    swcli [GLOBAL OPTIONS] ui <INSTANCE>

    Open the web page for the corresponding instance.

    - + \ No newline at end of file diff --git a/runtime/index.html b/runtime/index.html index fb88167ef..2138e44c5 100644 --- a/runtime/index.html +++ b/runtime/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    Starwhale Runtime

    overview

    Overview

    Starwhale Runtime aims to provide a reproducible and sharable running environment for python programs. You can easily share your working environment with your teammates or outsiders, and vice versa. Furthermore, you can run your programs on Starwhale Server or Starwhale Cloud without bothering with the dependencies.

    Starwhale works well with virtualenv, conda, and docker. If you are using one of them, it is straightforward to create a Starwhale Runtime based on your current environment.

    Multiple Starwhale Runtimes on your local machine can be switched freely by one command. You can work on different projects without messing up the environment.Starwhale Runtime consists of two parts: the base image and the dependencies.

    The base image

    The base is a docker image with Python, CUDA, and cuDNN installed. Starwhale provides various base images for you to choose from; see the following list:

    • Computer system architecture:
      • X86 (amd64)
      • Arm (aarch64)
    • Operating system:
      • Ubuntu 20.04 LTS (ubuntu:20.04)
    • Python:
      • 3.7
      • 3.8
      • 3.9
      • 3.10
      • 3.11
    • CUDA:
      • CUDA 11.3 + cuDNN 8.4
      • CUDA 11.4 + cuDNN 8.4
      • CUDA 11.5 + cuDNN 8.4
      • CUDA 11.6 + cuDNN 8.4
      • CUDA 11.7
    - + \ No newline at end of file diff --git a/runtime/yaml/index.html b/runtime/yaml/index.html index a3469be54..e01bef4de 100644 --- a/runtime/yaml/index.html +++ b/runtime/yaml/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    The runtime.yaml Specification

    runtime.yaml is the configuration file that defines the properties of the Starwhale Runtime. runtime.yaml is required for the yaml mode of the swcli runtime build command.

    Examples

    The simplest example

    dependencies:
    - pip:
    - numpy
    name: simple-test

    Define a Starwhale Runtime that uses venv as the Python virtual environment for package isolation, and installs the numpy dependency.

    The llama2 example

    name: llama2
    mode: venv
    environment:
    arch: noarch
    os: ubuntu:20.04
    cuda: 11.7
    python: "3.10"
    dependencies:
    - pip:
    - torch
    - fairscale
    - fire
    - sentencepiece
    - gradio >= 3.37.0
    # external starwhale dependencies
    - starwhale[serve] >= 0.5.5

    The full definition example

    # [required]The name of Starwhale Runtime
    name: demo
    # [optional]The mode of Starwhale Runtime: venv or conda. Default is venv.
    mode: venv
    # [optional]The configurations of pip and conda.
    configs:
    # If you do not use conda, ignore this field.
    conda:
    condarc: # custom condarc config file
    channels:
    - defaults
    show_channel_urls: true
    default_channels:
    - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main
    - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/r
    - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/msys2
    custom_channels:
    conda-forge: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
    pytorch: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
    pytorch-lts: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
    nvidia: https://mirrors.aliyun.com/anaconda/cloud
    ssl_verify: false
    default_threads: 10
    pip:
    # pip config set global.index-url
    index_url: https://example.org/
    # pip config set global.extra-index-url
    extra_index_url: https://another.net/
    # pip config set install.trusted-host
    trusted_host:
    - example.org
    - another.net
    # [optional] The definition of the environment.
    environment:
    # Now it must be ubuntu:20.04
    os: ubuntu:20.04
    # CUDA version. possible values: 11.3, 11.4, 11.5, 11.6, 11.7
    cuda: 11.4
    # Python version. possible values: 3.7, 3.8, 3.9, 3.10, 3.11
    python: 3.8
    # Define your custom base image
    docker:
    image: mycustom.com/docker/image:tag
    # [required] The dependencies of the Starwhale Runtime.
    dependencies:
    # If this item is present, conda env create -f conda.yml will be executed
    - conda.yaml
    # If this item is present, pip install -r requirements.txt will be executed before installing other pip packages
    - requirements.txt
    # Packages to be install with conda. venv mode will ignore the conda field.
    - conda:
    - numpy
    - requests
    # Packages to be installed with pip. The format is the same as requirements.txt
    - pip:
    - pillow
    - numpy
    - deepspeed==0.9.0
    - safetensors==0.3.0
    - transformers @ git+https://github.com/huggingface/transformers.git@3c3108972af74246bc3a0ecf3259fd2eafbacdef
    - peft @ git+https://github.com/huggingface/peft.git@fcff23f005fc7bfb816ad1f55360442c170cd5f5
    - accelerate @ git+https://github.com/huggingface/accelerate.git@eba6eb79dc2ab652cd8b44b37165a4852768a8ac
    # Additional wheels packages to be installed when restoring the runtime
    - wheels:
    - dummy-0.0.0-py3-none-any.whl
    # Additional files to be included in the runtime
    - files:
    - dest: bin/prepare.sh
    name: prepare
    src: scripts/prepare.sh
    # Run some custom commands
    - commands:
    - apt-get install -y libgl1
    - touch /tmp/runtime-command-run.flag
    - + \ No newline at end of file diff --git a/server/guides/server_admin/index.html b/server/guides/server_admin/index.html index 1b7c4f662..b1b887ffa 100644 --- a/server/guides/server_admin/index.html +++ b/server/guides/server_admin/index.html @@ -10,14 +10,14 @@ - +
    Version: 0.6.4

    Controller Admin Settings

    Superuser Password Reset

    In case you forget the superusers password, you could use the sql below to reset the password to abcd1234

    update user_info set user_pwd='ee9533077d01d2d65a4efdb41129a91e', user_pwd_salt='6ea18d595773ccc2beacce26' where id=1

    After that, you could login to the console and then change the password to what you really want.

    System Settings

    You could customize system to make it easier to use by leverage of System setting. Here is an example below:

    dockerSetting:
    registryForPull: "docker-registry.starwhale.cn/star-whale"
    registryForPush: ""
    userName: ""
    password: ""
    insecure: true
    pypiSetting:
    indexUrl: ""
    extraIndexUrl: ""
    trustedHost: ""
    retries: 10
    timeout: 90
    imageBuild:
    resourcePool: ""
    image: ""
    clientVersion: ""
    pythonVersion: ""
    datasetBuild:
    resourcePool: ""
    image: ""
    clientVersion: ""
    pythonVersion: ""
    resourcePoolSetting:
    - name: "default"
    nodeSelector: null
    resources:
    - name: "cpu"
    max: null
    min: null
    defaults: 5.0
    - name: "memory"
    max: null
    min: null
    defaults: 3145728.0
    - name: "nvidia.com/gpu"
    max: null
    min: null
    defaults: null
    tolerations: null
    metadata: null
    isPrivate: null
    visibleUserIds: null
    storageSetting:
    - type: "minio"
    tokens:
    bucket: "users"
    ak: "starwhale"
    sk: "starwhale"
    endpoint: "http://10.131.0.1:9000"
    region: "local"
    hugeFileThreshold: "10485760"
    hugeFilePartSize: "5242880"
    - type: "s3"
    tokens:
    bucket: "users"
    ak: "starwhale"b
    sk: "starwhale"
    endpoint: "http://10.131.0.1:9000"
    region: "local"
    hugeFileThreshold: "10485760"
    hugeFilePartSize: "5242880"

    Image Registry

    Tasks dispatched by the server are based on docker images. Pulling these images could be slow if your internet is not working well. Starwhale Server supports the custom image registries, includes dockerSetting.registryForPush and dockerSetting.registryForPull.

    Resource Pool

    The resourcePoolSetting allows you to manage your cluster in a group manner. It is currently implemented by K8S nodeSelector, you could label your machines in K8S cluster and make them a resourcePool in Starwhale.

    Remote Storage

    The storageSetting allows you to manage the storages the server could access.

    storageSetting:
    - type: s3
    tokens:
    - bucket: starwhale # required
    ak: access_key # required
    sk: scret_key # required
    endpoint: http://s3.region.amazonaws.com # optional
    region: region of the service # required when endpoint is empty
    hugeFileThreshold: 10485760 # bigger than 10MB will use multiple part upload
    hugeFilePartSize: 5242880 # MB part size for multiple part upload
    - type: minio
    tokens:
    - bucket: starwhale # required
    ak: access_key # required
    sk: scret_key # required
    endpoint: http://10.131.0.1:9000 # required
    region: local # optional
    hugeFileThreshold: 10485760 # bigger than 10MB will use multiple part upload
    hugeFilePartSize: 5242880 # MB part size for multiple part upload
    - type: aliyun
    tokens:
    - bucket: starwhale # required
    ak: access_key # required
    sk: scret_key # required
    endpoint: http://10.131.0.2:9000 # required
    region: local # optional
    hugeFileThreshold: 10485760 # bigger than 10MB will use multiple part upload
    hugeFilePartSize: 5242880 # MB part size for multiple part upload

    Every storageSetting item has a corresponding implementation of StorageAccessService interface. Starwhale has four build-in implementations:

    • StorageAccessServiceAliyun matches type in (aliyun,oss)
    • StorageAccessServiceMinio matches type in (minio)
    • StorageAccessServiceS3 matches type in (s3)
    • StorageAccessServiceFile matches type in (fs, file)

    Each of the implementations has different requirements for tokens. endpoint is required when type in (aliyun,minio), region is required when type is s3 and endpoint is empty. While fs/file type requires tokens has name rootDir and serviceProvider. Please refer the code for more details.

    - + \ No newline at end of file diff --git a/server/index.html b/server/index.html index ff3da7522..95beec24b 100644 --- a/server/index.html +++ b/server/index.html @@ -10,13 +10,13 @@ - + - + \ No newline at end of file diff --git a/server/installation/docker-compose/index.html b/server/installation/docker-compose/index.html index 5f01ea094..eeba30d65 100644 --- a/server/installation/docker-compose/index.html +++ b/server/installation/docker-compose/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    Install Starwhale Server with Docker Compose

    Prerequisites

    Usage

    Start up the server

    wget https://raw.githubusercontent.com/star-whale/starwhale/main/docker/compose/compose.yaml
    GLOBAL_IP=${your_accessible_ip_for_server} ; docker compose up

    The GLOBAL_IP is the ip for Controller which could be accessed by all swcli both inside docker containers and other user machines.

    compose.yaml contains Starwhale Controller/MySQL/MinIO services. Touch a compose.override.yaml, as its name implies, can contain configuration overrides for compose.yaml. The available configurations are specified here

    - + \ No newline at end of file diff --git a/server/installation/docker/index.html b/server/installation/docker/index.html index 5fb54f450..e4d9935ad 100644 --- a/server/installation/docker/index.html +++ b/server/installation/docker/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    Install Starwhale Server with Docker

    Prerequisites

    • A running Kubernetes 1.19+ cluster to run tasks.
    • A running MySQL 8.0+ instance to store metadata.
    • A S3-compatible object storage to save datasets, models, and others.

    Please make sure pods on the Kubernetes cluster can access the port exposed by the Starwhale Server installation.

    Prepare an env file for Docker

    Starwhale Server can be configured by environment variables.

    An env file template for Docker is here. You may create your own env file by modifying the template.

    Prepare a kubeconfig file [Optional][SW_SCHEDULER=k8s]

    The kubeconfig file is used for accessing the Kubernetes cluster. For more information about kubeconfig files, see the Official Kubernetes Documentation.

    If you have a local kubectl command-line tool installed, you can run kubectl config view to see your current configuration.

    Run the Docker image

    docker run -it -d --name starwhale-server -p 8082:8082 \
    --restart unless-stopped \
    --mount type=bind,source=<path to your kubeconfig file>,destination=/root/.kube/config,readonly \
    --env-file <path to your env file> \
    ghcr.io/star-whale/server:0.5.6

    For users in the mainland of China, use docker image: docker-registry.starwhale.cn/star-whale/server.

    - + \ No newline at end of file diff --git a/server/installation/helm-charts/index.html b/server/installation/helm-charts/index.html index c75d4400c..77a301894 100644 --- a/server/installation/helm-charts/index.html +++ b/server/installation/helm-charts/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    Install Starwhale Server with Helm

    Prerequisites

    • A running Kubernetes 1.19+ cluster to run tasks.
    • A running MySQL 8.0+ instance to store metadata.
    • A S3-compatible object storage system to save datasets, models, and others.
    • Helm 3.2.0+.

    The Starwhale Helm Charts includes MySQL and MinIO as dependencies. If you do not have your own MySQL instance or any S3-compatible object storage available, use the Helm Charts to install. Please check Installation Options to learn how to install Starwhale Server with MySQL and MinIO.

    Create a service account on Kubernetes for Starwhale Server

    If Kubernetes RBAC is enabled (In Kubernetes 1.6+, RBAC is enabled by default), Starwhale Server can not work properly unless is started by a service account with at least the following permissions:

    ResourceAPI GroupGetListWatchCreateDelete
    jobsbatchYYYYY
    podscoreYYY
    nodescoreYYY
    events""Y

    Example:

    apiVersion: rbac.authorization.k8s.io/v1
    kind: ClusterRole
    metadata:
    name: starwhale-role
    rules:
    - apiGroups:
    - ""
    resources:
    - pods
    - nodes
    verbs:
    - get
    - list
    - watch
    - apiGroups:
    - "batch"
    resources:
    - jobs
    verbs:
    - create
    - get
    - list
    - watch
    - delete
    - apiGroups:
    - ""
    resources:
    - events
    verbs:
    - get
    - watch
    - list
    ---
    apiVersion: rbac.authorization.k8s.io/v1
    kind: ClusterRoleBinding
    metadata:
    name: starwhale-binding
    roleRef:
    apiGroup: rbac.authorization.k8s.io
    kind: ClusterRole
    name: starwhale-role
    subjects:
    - kind: ServiceAccount
    name: starwhale

    Downloading Starwhale Helm Charts

    helm repo add starwhale https://star-whale.github.io/charts
    helm repo update

    Installing Starwhale Server

    helm install starwhale-server starwhale/starwhale-server -n starwhale --create-namespace

    If you have a local kubectl command-line tool installed, you can run kubectl get pods -n starwhale to check if all pods are running.

    Updating Starwhale Server

    helm repo update
    helm upgrade starwhale-server starwhale/starwhale-server

    Uninstalling Starwhale Server

    helm delete starwhale-server
    - + \ No newline at end of file diff --git a/server/installation/index.html b/server/installation/index.html index f1e409c0d..d295ce271 100644 --- a/server/installation/index.html +++ b/server/installation/index.html @@ -10,13 +10,13 @@ - + - + \ No newline at end of file diff --git a/server/installation/minikube/index.html b/server/installation/minikube/index.html index ea7bb4c2a..41da4a705 100644 --- a/server/installation/minikube/index.html +++ b/server/installation/minikube/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    Install Starwhale Server with Minikube

    Prerequisites

    Starting Minikube

    minikube start --addons ingress

    For users in the mainland of China, please run the following commands:

    minikube start --kubernetes-version=1.25.3 --image-repository=docker-registry.starwhale.cn/minikube --base-image=docker-registry.starwhale.cn/minikube/k8s-minikube/kicbase:v0.0.42

    minikube addons enable ingress --images="KubeWebhookCertgenPatch=ingress-nginx/kube-webhook-certgen:v20231011-8b53cabe0,KubeWebhookCertgenCreate=ingress-nginx/kube-webhook-certgen:v20231011-8b53cabe0,IngressController=ingress-nginx/controller:v1.9.4"

    The docker registry docker-registry.starwhale.cn/minikube currently only caches the images for Kubernetes 1.25.3. Another choice, you can also use Aliyun mirror:

    minikube start --image-mirror-country=cn

    minikube addons enable ingress --images="KubeWebhookCertgenPatch=kube-webhook-certgen:v20231011-8b53cabe0,KubeWebhookCertgenCreate=kube-webhook-certgen:v20231011-8b53cabe0,IngressController=nginx-ingress-controller:v1.9.4" --registries="KubeWebhookCertgenPatch=registry.cn-hangzhou.aliyuncs.com/google_containers,KubeWebhookCertgenCreate=registry.cn-hangzhou.aliyuncs.com/google_containers,IngressController=registry.cn-hangzhou.aliyuncs.com/google_containers"

    If there is no kubectl bin in your machine, you may use minikube kubectl or alias kubectl="minikube kubectl --" alias command.

    Installing Starwhale Server

    helm repo add starwhale https://star-whale.github.io/charts
    helm repo update
    helm pull starwhale/starwhale --untar --untardir ./charts

    helm upgrade --install starwhale ./charts/starwhale -n starwhale --create-namespace -f ./charts/starwhale/values.minikube.global.yaml

    For users in the mainland of China, use values.minikube.global.yaml:

    helm upgrade --install starwhale ./charts/starwhale -n starwhale --create-namespace -f ./charts/starwhale/values.minikube.cn.yaml

    After the installation is successful, the following prompt message appears:

        Release "starwhale" has been upgraded. Happy Helming!
    NAME: starwhale
    LAST DEPLOYED: Tue Feb 14 16:25:03 2023
    NAMESPACE: starwhale
    STATUS: deployed
    REVISION: 14
    NOTES:
    ******************************************
    Chart Name: starwhale
    Chart Version: 0.5.6
    App Version: latest
    Starwhale Image:
    - server: ghcr.io/star-whale/server:latest

    ******************************************
    Controller:
    - visit: http://controller.starwhale.svc
    Minio:
    - web visit: http://minio.starwhale.svc
    - admin visit: http://minio-admin.starwhale.svc
    MySQL:
    - port-forward:
    - run: kubectl port-forward --namespace starwhale svc/mysql 3306:3306
    - visit: mysql -h 127.0.0.1 -P 3306 -ustarwhale -pstarwhale
    Please run the following command for the domains searching:
    echo "$(sudo minikube ip) controller.starwhale.svc minio.starwhale.svc minio-admin.starwhale.svc " | sudo tee -a /etc/hosts
    ******************************************
    Login Info:
    - starwhale: u:starwhale, p:abcd1234
    - minio admin: u:minioadmin, p:minioadmin

    *_* Enjoy to use Starwhale Platform. *_*

    Checking Starwhale Server status

    Keep checking the minikube service status until all deployments are running(waiting for 3~5 mins):

    kubectl get deployments -n starwhale
    NAMEREADYUP-TO-DATEAVAILABLEAGE
    controller1/1115m
    minio1/1115m
    mysql1/1115m

    Visiting for local

    Make the Starwhale controller accessible locally with the following command:

    echo "$(sudo minikube ip) controller.starwhale.svc minio.starwhale.svc  minio-admin.starwhale.svc " | sudo tee -a /etc/hosts

    Then you can visit http://controller.starwhale.svc in your local web browser.

    Visiting for others

    • Step 1: in the Starwhale Server machine

      for temporary use with socat command:

      # install socat at first, ref: https://howtoinstall.co/en/socat
      sudo socat TCP4-LISTEN:80,fork,reuseaddr,bind=0.0.0.0 TCP4:`minikube ip`:80

      When you kill the socat process, the share access will be blocked. iptables maybe a better choice for long-term use.

    • Step 2: in the other machines

      # for macOSX or Linux environment, run the command in the shell.
      echo ${your_machine_ip} controller.starwhale.svc minio.starwhale.svc minio-admin.starwhale.svc " | sudo tee -a /etc/hosts

      # for Windows environment, run the command in the PowerShell with administrator permission.
      Add-Content -Path C:\Windows\System32\drivers\etc\hosts -Value "`n${your_machine_ip} controller.starwhale.svc minio.starwhale.svc minio-admin.starwhale.svc"
    - + \ No newline at end of file diff --git a/server/installation/starwhale_env/index.html b/server/installation/starwhale_env/index.html index cb149f966..0542b0088 100644 --- a/server/installation/starwhale_env/index.html +++ b/server/installation/starwhale_env/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    Starwhale Server Environment Example

    ################################################################################
    # *** Required ***
    # The external Starwhale server URL. For example: https://cloud.starwhale.ai
    SW_INSTANCE_URI=

    # The listening port of Starwhale Server
    SW_CONTROLLER_PORT=8082

    # The maximum upload file size. This setting affects datasets and models uploading when copied from outside.
    SW_UPLOAD_MAX_FILE_SIZE=20480MB
    ################################################################################
    # The base URL of the Python Package Index to use when creating a runtime environment.
    SW_PYPI_INDEX_URL=http://10.131.0.1/repository/pypi-hosted/simple/

    # Extra URLs of package indexes to use in addition to the base url.
    SW_PYPI_EXTRA_INDEX_URL=

    # Space separated hostnames. When any host specified in the base URL or extra URLs does not have a valid SSL
    # certification, use this option to trust it anyway.
    SW_PYPI_TRUSTED_HOST=
    ################################################################################
    # The JWT token expiration time. When the token expires, the server will request the user to login again.
    SW_JWT_TOKEN_EXPIRE_MINUTES=43200

    # *** Required ***
    # The JWT secret key. All strings are valid, but we strongly recommend you to use a random string with at least 16 characters.
    SW_JWT_SECRET=
    ################################################################################
    # The scheduler controller to use. Valid values are:
    # docker: Controller schedule jobs by leveraging docker
    # k8s: Controller schedule jobs by leveraging Kubernetes
    SW_SCHEDULER=k8s

    # The Kubernetes namespace to use when running a task when SW_SCHEDULER is k8s
    SW_K8S_NAME_SPACE=default

    # The path on the Kubernetes host node's filesystem to cache Python packages. Use the setting only if you have
    # the permission to use host node's filesystem. The runtime environment setup process may be accelerated when the host
    # path cache is used. Leave it blank if you do not want to use it.
    SW_K8S_HOST_PATH_FOR_CACHE=

    # The ip for the containers created by Controller when SW_SCHEDULER is docker
    SW_DOCKER_CONTAINER_NODE_IP=127.0.0.1
    ###############################################################################
    # *** Required ***
    # The object storage system type. Valid values are:
    # s3: [AWS S3](https://aws.amazon.com/s3) or other s3-compatible object storage systems
    # aliyun: [Aliyun OSS](https://www.alibabacloud.com/product/object-storage-service)
    # minio: [MinIO](https://min.io)
    # file: Local filesystem
    SW_STORAGE_TYPE=

    # The path prefix for all data saved on the storage system.
    SW_STORAGE_PREFIX=
    ################################################################################
    # The following settings are only used when SW_STORAGE_TYPE is file.

    # The root directory to save data.
    # This setting is only used when SW_STORAGE_TYPE is file.
    SW_STORAGE_FS_ROOT_DIR=/usr/local/starwhale
    ################################################################################
    # The following settings are only used when SW_STORAGE_TYPE is not file.

    # *** Required ***
    # The name of the bucket to save data.
    SW_STORAGE_BUCKET=

    # *** Required ***
    # The endpoint URL of the object storage service.
    # This setting is only used when SW_STORAGE_TYPE is s3 or aliyun.
    SW_STORAGE_ENDPOINT=

    # *** Required ***
    # The access key used to access the object storage system.
    SW_STORAGE_ACCESSKEY=

    # *** Required ***
    # The secret access key used to access the object storage system.
    SW_STORAGE_SECRETKEY=

    # *** Optional ***
    # The region of the object storage system.
    SW_STORAGE_REGION=

    # Starwhale Server will use multipart upload when uploading a large file. This setting specifies the part size.
    SW_STORAGE_PART_SIZE=5MB
    ################################################################################
    # MySQL settings

    # *** Required ***
    # The hostname/IP of the MySQL server.
    SW_METADATA_STORAGE_IP=

    # The port of the MySQL server.
    SW_METADATA_STORAGE_PORT=3306

    # *** Required ***
    # The database used by Starwhale Server
    SW_METADATA_STORAGE_DB=starwhale

    # *** Required ***
    # The username of the MySQL server.
    SW_METADATA_STORAGE_USER=

    # *** Required ***
    # The password of the MySQL server.
    SW_METADATA_STORAGE_PASSWORD=
    ################################################################################

    # The cache directory for the WAL files. Point it to a mounted volume or host path with enough space.
    # If not set, the WAL files will be saved in the docker runtime layer, and will be lost when the container is restarted.
    SW_DATASTORE_WAL_LOCAL_CACHE_DIR=
    - + \ No newline at end of file diff --git a/server/project/index.html b/server/project/index.html index 28e67673d..32fbf38ac 100644 --- a/server/project/index.html +++ b/server/project/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    How to Organize and Manage Resources with Starwhale Projects

    Project is the basic unit for organizing and managing resources (such as models, datasets, runtime environments, etc.). You can create and manage projects based on your needs. For example, you can create projects by business team, product line, or models. One user can create and participate in one or more projects.

    Project type

    There are two types of projects:

    • Private project: The project (and related resources in the project) is only visible to project members with permission. Project members can view or edit the project (as well as associated resources in the project). For more information on roles, please take a look at Roles and permissions in Starwhale.

    • Public project: The project (and related resources in the project) is visible to all Starwhale users. Project members can view or edit the project (as well as associated resources in the project). For more information on roles, please take a look at Roles and permissions in Starwhale.

    Create a project

    1. Click the Create button in the upper right corner of the project list page;
    2. Enter a name for the project. Pay attention to avoiding duplicate names. For more information, please see Names in Starwhale
    3. Select the Project Type, which is defaulted to private project and can be selected as public according to needs;
    4. Fill in the description content;
    5. To finish, Click the Submit button.

    Edit a project

    The name, privacy and description of a project can be edited.

    1. Go to the project list page and find the project that needs to be edited by searching for the project name, then click the Edit Project button;
    2. Edit the items that need to be edited;
    3. Click Submit to save the edited content;
    4. If you're editing multiple projects, repeat steps 1 through 3.

    View a project

    My projects

    On the project list page, only my projects are displayed by default. My projects refer to the projects participated in by the current users as project members or project owners.

    Project sorting

    On the project list page, all projects are supported to be sorted by "Recently visited", "Project creation time from new to old", and "Project creation time from old to new", which can be selected according to your needs.

    Delete a project

    Once a project is deleted, all related resources (such as datasets, models, runtimes, evaluations, etc.) will be deleted and cannot be restored.

    1. Enter the project list page and search for the project name to find the project that needs to be deleted. Hover your mouse over the project you want to delete, then click the Delete button;
    2. Follow the prompts, enter the relevant information, click Confirm to delete the project, or click Cancel to cancel the deletion;
    3. If you are deleting multiple projects, repeat the above steps.

    Manage project member

    Only users with the admin role can assign people to the project. The project owner defaulted to having the project owner role.

    Add a member

    1. Click Manage Members to go to the project member list page;
    2. Click the Add Member button in the upper right corner.
    3. Enter the Username you want to add, select a project role for the user in the project.
    4. Click submit to complete.
    5. If you're adding multiple members, repeat steps 1 through 4.

    Remove a member

    1. On the project list page or project overview tab, click Manage Members to go to the project member list page.
    2. Search for the username you want to delete, then click the Delete button.
    3. Click Yes to delete the user from this project, click No to cancel the deletion.
    4. If you're removing multiple members, repeat steps 1 through 3.

    Edit a member's role

    1. Hover your mouse over the project you want to edit, then click Manage Members to go to the project member list page.
    2. Find the username you want to adjust through searching, click the Project Role drop-down menu, and select a new project role. For more information on roles, please take a look at Roles and permissions in Starwhale.
    - + \ No newline at end of file diff --git a/swcli/config/index.html b/swcli/config/index.html index 146e77882..5bb2227a3 100644 --- a/swcli/config/index.html +++ b/swcli/config/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    Configuration

    Standalone Instance is installed on the user's laptop or development server, providing isolation at the level of Linux/macOX users. Users can install the Starwhale Python package using the pip command and execute any swcli command. After that, they can view their Starwhale configuration in ~/.config/starwhale/config.yaml. In the vast majority of cases, users do not need to manually modify the config.yaml file.

    The ~/.config/starwhale/config.yaml file has permissions set to 0o600 to ensure security, as it contains sensitive information such as encryption keys. Users are advised not to change the file permissions.You could customize your swcli by swci config edit:

    swcli config edit

    config.yaml example

    The typical config.yaml file is as follows:

    • The default instance is local.
    • cloud-cn/cloud-k8s/pre-k8s are the server/cloud instances, local is the standalone instance.
    • The local storage root directory for the Standalone Instance is set to /home/liutianwei/.starwhale.
    current_instance: local
    instances:
    cloud-cn:
    sw_token: ${TOKEN}
    type: cloud
    updated_at: 2022-09-28 18:41:05 CST
    uri: https://cloud.starwhale.cn
    user_name: starwhale
    user_role: normal
    cloud-k8s:
    sw_token: ${TOKEN}
    type: cloud
    updated_at: 2022-09-19 16:10:01 CST
    uri: http://cloud.pre.intra.starwhale.ai
    user_name: starwhale
    user_role: normal
    local:
    current_project: self
    type: standalone
    updated_at: 2022-06-09 16:14:02 CST
    uri: local
    user_name: liutianwei
    pre-k8s:
    sw_token: ${TOKEN}
    type: cloud
    updated_at: 2022-09-19 18:06:50 CST
    uri: http://console.pre.intra.starwhale.ai
    user_name: starwhale
    user_role: normal
    link_auths:
    - ak: starwhale
    bucket: users
    connect_timeout: 10.0
    endpoint: http://10.131.0.1:9000
    read_timeout: 100.0
    sk: starwhale
    type: s3
    storage:
    root: /home/liutianwei/.starwhale
    version: '2.0'

    config.yaml definition

    ParameterDescriptionTypeDefault ValueRequired
    current_instanceThe name of the default instance to use. It is usually set using the swcli instance select command.StringselfYes
    instancesManaged instances, including Standalone, Server and Cloud Instances. There must be at least one Standalone Instance named "local" and one or more Server/Cloud Instances. You can log in to a new instance with swcli instance login and log out from an instance with swcli instance logout.DictStandalone Instance named "local"Yes
    instances.{instance-alias-name}.sw_tokenLogin token for Server/Cloud Instances. It is only effective for Server/Cloud Instances. Subsequent swcli operations on Server/Cloud Instances will use this token. Note that tokens have an expiration time, typically set to one month, which can be configured within the Server/Cloud Instance.StringCloud - Yes, Standalone - No
    instances.{instance-alias-name}.typeType of the instance, currently can only be "cloud" or "standalone".Choice[string]Yes
    instances.{instance-alias-name}.uriFor Server/Cloud Instances, the URI is an http/https address. For Standalone Instances, the URI is set to "local".StringYes
    instances.{instance-alias-name}.user_nameUser's nameStringYes
    instances.{instance-alias-name}.current_projectDefault Project under the current instance. It will be used to fill the "project" field in the URI representation by default. You can set it using the swcli project select command.StringYes
    instances.{instance-alias-name}.user_roleUser's role.StringnormalYes
    instances.{instance-alias-name}.updated_atThe last updated time for this instance configuration.Time format stringYes
    storageSettings related to local storage.DictYes
    storage.rootThe root directory for Standalone Instance's local storage. Typically, if there is insufficient space in the home directory and you manually move data files to another location, you can modify this field.String~/.starwhaleYes
    versionThe version of config.yaml, currently only supports 2.0.String2.0Yes

    You could put starwhale.Link to your assets while the URI in the Link could be whatever(only s3 like or http is implemented) you need, such as s3://10.131.0.1:9000/users/path. However, Links may need to be authed, you could config the auth info in link_auths.

    link_auths:
    - type: s3
    ak: starwhale
    bucket: users
    region: local
    connect_timeout: 10.0
    endpoint: http://10.131.0.1:9000
    read_timeout: 100.0
    sk: starwhale

    Items in link_auths will match the uri in Links automatically. s3 typed link_auth matching Links by looking up bucket and endpoint.

    - + \ No newline at end of file diff --git a/swcli/index.html b/swcli/index.html index b451844b0..ffa2555bc 100644 --- a/swcli/index.html +++ b/swcli/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    Starwhale Client (swcli) User Guide

    The Starwhale Client (swcli) is a command-line tool that enables you to interact with Starwhale instances. You can use swcli to complete almost all tasks in Starwhale. swcli is written in pure python3 (require Python 3.7 | 3.11) so that it can be easily installed by the pip command. Currently, swcli only supports Linux and macOS, Windows is coming soon.

    - + \ No newline at end of file diff --git a/swcli/installation/index.html b/swcli/installation/index.html index 3ac7ba23a..59897f261 100644 --- a/swcli/installation/index.html +++ b/swcli/installation/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    Installation Guide

    We can use swcli to complete all tasks for Starwhale Instances. swcli is written by pure python3, which can be installed easily by the pip command.Here are some installation tips that can help you get a cleaner, unambiguous, no dependency conflicts swcli python environment.

    Installing Advice

    DO NOT install Starwhale in your system's global Python environment. It will cause a python dependency conflict problem.

    Prerequisites

    • Python 3.7 ~ 3.11
    • Linux or macOS
    • Conda (optional)

    In the Ubuntu system, you can run the following commands:

    sudo apt-get install python3 python3-venv python3-pip

    #If you want to install multi python versions
    sudo add-apt-repository -y ppa:deadsnakes/ppa
    sudo apt-get update
    sudo apt-get install -y python3.7 python3.8 python3.9 python3-pip python3-venv python3.8-venv python3.7-venv python3.9-venv

    swcli works on macOS. If you run into issues with the default system Python3 on macOS, try installing Python3 through the homebrew:

    brew install python3

    Install swcli

    Install with venv

    python3 -m venv ~/.cache/venv/starwhale
    source ~/.cache/venv/starwhale/bin/activate
    python3 -m pip install starwhale

    swcli --version

    sudo ln -sf "$(which swcli)" /usr/local/bin/

    Install with conda

    conda create --name starwhale --yes  python=3.9
    conda activate starwhale
    python3 -m pip install starwhale

    swcli --version

    sudo ln -sf "$(which swcli)" /usr/local/bin/

    👏 Now, you can use swcli in the global environment.

    Install for the special scenarios

    # for Audio processing
    python -m pip install starwhale[audio]

    # for Image processing
    python -m pip install starwhale[pillow]

    # for swcli model server command
    python -m pip install starwhale[server]

    # for built-in online serving
    python -m pip install starwhale[online-serve]

    # install all dependencies
    python -m pip install starwhale[all]

    Update swcli

    #for venv
    python3 -m pip install --upgrade starwhale

    #for conda
    conda run -n starwhale python3 -m pip install --upgrade starwhale

    Uninstall swcli

    python3 -m pip remove starwhale

    rm -rf ~/.config/starwhale
    rm -rf ~/.starwhale
    - + \ No newline at end of file diff --git a/swcli/swignore/index.html b/swcli/swignore/index.html index bef41a491..40dc9358d 100644 --- a/swcli/swignore/index.html +++ b/swcli/swignore/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    About the .swignore file

    The .swignore file is similar to .gitignore, .dockerignore, and other files used to define ignored files or dirs. The .swignore files mainly used in the Starwhale Model building process. By default, the swcli model build command or starwhale.model.build() Python SDK will traverse all files in the specified directory and automatically exclude certain known files or directories that are not suitable for inclusion in the model package.

    PATTERN FORMAT

    • Each line in a swignore file specifies a pattern, which matches files and directories.
    • A blank line matches no files, so it can serve as a separator for readability.
    • An asterisk * matches anything except a slash.
    • A line starting with # serves as a comment.
    • Support wildcard expression, for example: *.jpg, .png.

    Auto Ingored files or dirs

    If you want to include the auto ingored files or dirs, you can add --add-all for swcli model build command.

    • __pycache__/
    • *.py[cod]
    • *$py.class
    • venv installation dir
    • conda installation dir

    Example

    Here is the .swignore file used in the MNIST example:

    venv/*
    .git/*
    .history*
    .vscode/*
    .venv/*
    data/*
    .idea/*
    *.py[cod]
    - + \ No newline at end of file diff --git a/swcli/uri/index.html b/swcli/uri/index.html index 8b2ade1d7..39e9abe04 100644 --- a/swcli/uri/index.html +++ b/swcli/uri/index.html @@ -10,13 +10,13 @@ - +
    Version: 0.6.4

    Starwhale Resources URI

    tip

    Resource URI is widely used in Starwhale client commands. The URI can refer to a resource in the local instance or any other resource in a remote instance. In this way, the Starwhale client can easily manipulate any resource.

    concepts-org.jpg

    Instance URI

    Instance URI can be either:

    • local: standalone instance.
    • [http(s)://]<hostname or ip>[:<port>]: cloud instance with HTTP address.
    • [cloud://]<cloud alias>: cloud or server instance with an alias name, which can be configured in the instance login phase.
    caution

    "local" is different from "localhost". The former means the local standalone instance without a controller, while the latter implies a controller listening at the default port 8082 on the localhost.

    Example:

    # log in Starwhale Cloud; the alias is swcloud
    swcli instance login --username <your account name> --password <your password> https://cloud.starwhale.ai --alias swcloud

    # copy a model from the local instance to the cloud instance
    swcli model copy mnist/version/latest swcloud/project/<your account name>:demo

    # copy a runtime to a Starwhale Server instance: http://localhost:8081
    swcli runtime copy pytorch/version/v1 http://localhost:8081/project/<your account name>:demo

    Project URI

    Project URI is in the format [<Instance URI>/project/]<project name>. If the instance URI is not specified, use the current instance instead.

    Example:

    swcli project select self   # select the self project in the current instance
    swcli project info local/project/self # inspect self project info in the local instance

    Model/Dataset/Runtime URI

    • Model URI: [<Project URI>/model/]<model name>[/version/<version id|tag>].
    • Dataset URI: [<Project URI>/dataset/]<dataset name>[/version/<version id|tag>].
    • Runtime URI: [<Project URI>/runtime/]<runtime name>[/version/<version id|tag>].
    tip
    • swcli supports human-friendly short version id. You can type the first few characters of the version id, provided it is at least four characters long and unambiguous. However, the recover command must use the complete version id.
    • If the project URI is not specified, the default project will be used.
    • You can always use the version tag instead of the version id.

    Example:

    swcli model info mnist/version/hbtdenjxgm4ggnrtmftdgyjzm43tioi  # inspect model info, model name: mnist, version:hbtdenjxgm4ggnrtmftdgyjzm43tioi
    swcli model remove mnist/version/hbtdenj # short version
    swcli model info mnist # inspect mnist model info
    swcli model run mnist --runtime pytorch-mnist --dataset mnist # use the default latest tag

    Job URI

    • format: [<Project URI>/job/]<job id>.
    • If the project URI is not specified, the default project will be used.

    Example:

    swcli job info mezdayjzge3w   # Inspect mezdayjzge3w version in default instance and default project
    swcli job info local/project/self/job/mezday # Inspect the local instance, self project, with short job id:mezday

    The default instance

    When the instance part of a project URI is omitted, the default instance is used instead. The default instance is the one selected by the swcli instance login or swcli instance use command.

    The default project

    When the project parts of Model/Dataset/Runtime/Evaluation URIs are omitted, the default project is used instead. The default project is the one selected by the swcli project use command.

    - + \ No newline at end of file diff --git a/zh/0.5.10/cloud/billing/bills/index.html b/zh/0.5.10/cloud/billing/bills/index.html index 2f94ea3e4..e0721ef3a 100644 --- a/zh/0.5.10/cloud/billing/bills/index.html +++ b/zh/0.5.10/cloud/billing/bills/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    账单明细

    账单明细查看

    登录账户中心,点击“账户管理”,可在账户概览页面查看最近账单明细,点击"全部账单”,可跳转查看全部账单明细。

    image

    image

    账单明细字段说明

    • 账单编号:账单的唯一标识
    • 资源:用户所使用的各类资源
    • 资源明细:使用资源运行的作业
    • 消费时间:账单开始时间至账单结束时间
    • 计费项:用户所用的产品或服务所含的具体的计费项目
    • 单价:产品或服务的单价
    • 单价单位:产品或服务单价的单位
    • 用量:产品或服务的使用量
    • 用量单位:产品或服务使用量的单位
    • 状态:账单的支付状态,分为:未结清、已结清、未结算
    - + \ No newline at end of file diff --git a/zh/0.5.10/cloud/billing/index.html b/zh/0.5.10/cloud/billing/index.html index dfe47bfe1..bc5f03b38 100644 --- a/zh/0.5.10/cloud/billing/index.html +++ b/zh/0.5.10/cloud/billing/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    产品计费概述

    Starwhale 支持按量付费,按量付费是一种先使用后付费的计费方式。通过按量付费,您可以按资源使用量付费,不需要提前购买大量资源。

    计费说明

    计费项

    Starwhale 根据您选购的资源规格(CPU、GPU、内存)及使用时长进行计费。

    计费方式

    Starwhale 支持按量付费,按量付费是一种先使用后付费的计费方式。通过按量付费,您可以按资源使用量付费,不需要提前购买大量资源。

    按量付费主要按照资源计费周期计费,在每个结算周期生成账单并从账户中扣除相应费用。创建 Job 时,需要确定计算资源配置。

    请确保您在 Job 运行期间可用余额充足,如果在 Job 运行过程中,您的账户余额不足,会导致 Job 无法完成并按照已运行时长收费。

    开通要求

    按照按量付费创建 Job 前,您的 Starwhale 账户可用余额不得小于一个计费周期。

    说明:账户可用余额=充值金额+代金券金额-已消费金额-已退款金额-已冻结金额

    计费周期

    每5分钟为一个计费周期,不足5分钟则按5分钟计算,按照 Job 运行时长结算。

    计费时长

    从 Job 开始运行时计费,到 Job 运行结束后结束计费

    账单明细

    登录账户中心,点击“账户管理”,可在账户概览页面查看最近账单明细,点击"全部账单”,可跳转查看全部账单明细。详细操作流程请参见账单明细

    欠费说明

    如果账号内存在欠费账单,您无法继续使用计算资源。尽快充值结清欠费账单后可继续使用。

    查看欠费金额

    1 登录账户中心

    2 在账户概览可查看欠费金额

    退款说明

    现金余额支持退款

    需要登录账户中心,点击账户管理>充值订单,可退款的充值订单会在操作列显示退款按钮。点击可发起退款申请,详细操作流程请参见申请退款

    - + \ No newline at end of file diff --git a/zh/0.5.10/cloud/billing/recharge/index.html b/zh/0.5.10/cloud/billing/recharge/index.html index 18fdf65f9..57ac18bd9 100644 --- a/zh/0.5.10/cloud/billing/recharge/index.html +++ b/zh/0.5.10/cloud/billing/recharge/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    充值和退款

    充值渠道

    Starwhale目前支持通过微信渠道进行充值。

    充值操作步骤

    操作路径:

    1 登录账户中心,点击“去充值”,可跳转至充值页面。

    image

    2 选择或者输入充值金额,充值金额需要大于50元,同时注意支付渠道的限额(超过限额会无法支付成功)。

    image

    3 选择充值方式,点击“确认充值”,跳转第三方支付渠道完成付款。

    image

    充值订单

    查看充值订单

    操作路径:

    1 登录账户中心,点击“账户管理”,可在账户概览页面查看最近充值订单,点击"全部订单,可跳转查看全部充值订单。

    image

    image

    继续支付充值订单

    如您在充值页面,点击“开始充值”后,因某些原因没有支付,可在30分钟内继续支付。

    操作路径:

    1 登录账户中心,点击“账户管理”,点击账户管理>充值订单菜单栏进入充值订单列表。

    image

    2 找到需要继续支付的订单,点击“继续支付

    image

    3 选择充值方式,点击“确认充值”,跳转第三方支付渠道完成付款。

    image

    取消充值订单

    操作路径:

    1 登录账户中心,点击“账户管理”,点击账户管理>充值订单菜单栏进入充值订单列表。

    image

    2 找到需要取消的订单,点击“取消”,弹出确认弹窗后,点击“”,可取消充值订单。

    image

    - + \ No newline at end of file diff --git a/zh/0.5.10/cloud/billing/refund/index.html b/zh/0.5.10/cloud/billing/refund/index.html index bd58c2a61..7fe95effb 100644 --- a/zh/0.5.10/cloud/billing/refund/index.html +++ b/zh/0.5.10/cloud/billing/refund/index.html @@ -10,7 +10,7 @@ - + @@ -18,7 +18,7 @@
    版本:0.5.10

    账户退款

    申请退款

    1 登录账户中心,点击“账户管理”,点击账户管理>充值订单菜单栏进入充值订单列表。

    image

    2 找到需要退款的订单,点击“退款”,填写退款原因,确认退款金额,可申请退款。

    ::: tips: 退订款项将原路退回,即通过微信支付的订单会退回到支付使用的微信 :::

    image

    image

    image

    image

    - + \ No newline at end of file diff --git a/zh/0.5.10/cloud/billing/voucher/index.html b/zh/0.5.10/cloud/billing/voucher/index.html index bf6c97499..301035a9c 100644 --- a/zh/0.5.10/cloud/billing/voucher/index.html +++ b/zh/0.5.10/cloud/billing/voucher/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    代金券

    什么是代金券

    代金券是starwhale以虚拟券形式给予客户的资金类权益,可用于抵扣运行时所使用资源的费用。

    如何查看我的代金券?

    登录Starwhale,进入“账户中心>代金券” 可查看代金券的编号,面值,余额,状态等信息。

    image

    点击右侧操作列“使用明细”打开“代金券使用明细页”,查看该代金券的交易时间、编号、支出等抵扣详细记录。

    image

    如何使用代金券?

    代金券适用于抵扣消费,如果您的Starwhale账户内有代金券,系统会优先抵扣代金券金额,代金券余额为0后会抵扣充值余额。

    - + \ No newline at end of file diff --git a/zh/0.5.10/cloud/index.html b/zh/0.5.10/cloud/index.html index 80b66108d..4f90a3a94 100644 --- a/zh/0.5.10/cloud/index.html +++ b/zh/0.5.10/cloud/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    Starwhale Cloud 用户指南

    Starwhale Cloud 是托管在公有云上的服务,由 Starwhale 团队负责运维,访问地址是 https://cloud.starwhale.cn

    - + \ No newline at end of file diff --git a/zh/0.5.10/community/contribute/index.html b/zh/0.5.10/community/contribute/index.html index 6a9cbda96..3d0850933 100644 --- a/zh/0.5.10/community/contribute/index.html +++ b/zh/0.5.10/community/contribute/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    Starwhale 开源贡献指南

    参与贡献

    Starwhale 非常欢迎来自开源社区的贡献,包括但不限于以下方式:

    • 描述使用过程中的遇到的问题
    • 提交Feature Request
    • 参与Slack和Github Issues讨论
    • 参与Code Review
    • 改进文档和示例程序
    • 修复程序Bug
    • 增加Test Case
    • 改进代码的可读性
    • 开发新的Features
    • 编写Enhancement Proposal

    可以通过以下方式参与开发者社区,获取最新信息和联系Starwhale开发者:

    Starwhale社区使用Github Issues来跟踪问题和管理新特性的开发。可以选择"good first issue"或"help wanted"标签的issue,作为参与开发Starwhale的起点。

    Starwhale资源列表

    代码基本结构

    核心目录组织及功能说明如下:

    • client:swcli和Python SDK的实现,使用Python3编写,对应Starwhale Standalone Instance的所有功能。
      • api:Python SDK的接口定义和实现。
      • cli:Command Line Interface的入口点。
      • base:Python 端的一些基础抽象。
      • core:Starwhale 核心概念的实现,包括Dataset、Model、Runtime、Project、Job、Evaluation等。
      • utils:Python 端的一些工具函数。
    • console:前端的实现,使用React + TypeScript编写,对应Starwhale Cloud Instance的Web UI。
    • server:Starwhale Controller的实现,使用Java编写,对应Starwhale Cloud Instance的后端API。
    • docker:Helm Charts,绝大多数Docker Image的Dockerfile等。
    • docs:Starwhale官方文档。
    • example:示例程序,包含MNIST等例子。
    • scripts:一些Bash和Python脚本,用来进行E2E测试和软件发布等。

    Fork&Clone Starwhale仓库

    您需要fork Starwhale仓库代码并clone到本机,

    搭建针对Standalone Instance的本地开发环境

    Standalone Instance采用Python编写,当要修改Python SDK和swcli时,需要进行相应的环境搭建。

    Standalone本地开发环境前置条件

    • OS:Linux或macOS
    • Python:3.7~3.11
    • Docker:>=19.03 (非必须,当调试dockerize、生成docker image或采用docker为载体运行模型任务时需要)
    • Python隔离环境:Python venv 或 virtualenv 或 conda等都可以,用来构建一个隔离的Python环境

    从源码进行安装

    基于上一步clone到本地的仓库目录:starwhale,并进入到client子目录:

    cd starwhale/client

    使用Conda创建一个Starwhale开发环境,或者使用venv/virtualenv等创建:

    conda create -n starwhale-dev python=3.8 -y
    conda activate starwhale-dev

    安装Client包及依赖到starwhale-dev环境中:

    make install-sw
    make install-dev-req

    输入swcli --version命令,观察是否安装成功,开发环境的swcli版本是 0.0.0.dev0

    ❯ swcli --version
    swcli, version 0.0.0.dev0

    ❯ swcli --version
    /home/username/anaconda3/envs/starwhale-dev/bin/swcli

    本地修改代码

    现在可以对Starwhale代码进行修改,不需要重复安装(make install-sw命令)就能在当前starwhale-dev环境是测试cli或sdk。Starwhale Repo中设置了 .editorconfig 文件,大部分IDE或代码编辑器会自动支持该文件的导入,采用统一的缩进设置。

    执行代码检查和测试

    starwhale 目录中操作,会执行单元测试、client的e2e测试、mypy检查、flake8检查和isort检查等。

    make client-all-check

    搭建针对Cloud Instance的本地开发环境

    Cloud Instance的后端采用Java编写,前端采用React+TypeScript编写,可以按需搭建相应的开发环境。

    搭建前端Console开发环境

    搭建后端Server开发环境

    • 开发语言:Java
    • 项目构建工具:Maven
    • 开发框架:Spring Boot+Mybatis
    • 测试框架:Junit5(其中mock框架为mockito,断言部分使用hamcrest,数据库、web服务等模拟使用Testcontainers)
    • 代码检查:使用maven插件 maven-checkstyle-plugin

    Server开发环境前置条件

    • OS:Linux、macOS或Windows
    • JDK: >=11
    • Docker:>=19.03
    • Maven:>=3.8.1
    • Mysql:>=8.0.29
    • Minio
    • Kubernetes cluster/Minikube(如果没有k8s集群,可以使用Minikube作为开发调试时的备选方案)

    修改代码并增加单测

    现在可以进入到相应模块,对server端的代码进行修改、调整。其中业务功能代码位置为src/main/java,单元测试目录为src/test/java。

    执行代码检查和单元测试

    cd starwhale/server
    mvn clean package

    本地部署服务

    • 前置服务

      • Minikube(可选,无k8s集群时可使用此服务,安装方式可见:Minikube

        minikube start
        minikube addons enable ingress
        minikube addons enable ingress-dns
      • Mysql

        docker run --name sw-mysql -d \
        -p 3306:3306 \
        -e MYSQL_ROOT_PASSWORD=starwhale \
        -e MYSQL_USER=starwhale \
        -e MYSQL_PASSWORD=starwhale \
        -e MYSQL_DATABASE=starwhale \
        mysql:latest
      • Minio

        docker run --name minio -d
        -p 9000:9000 --publish 9001:9001
        -e MINIO_DEFAULT_BUCKETS='starwhale'
        -e MINIO_ROOT_USER="minioadmin"
        -e MINIO_ROOT_PASSWORD="minioadmin"
        bitnami/minio:latest
    • 打包server程序

      若部署server端时,需要把前端同时部署上,可先执行前端部分的构建命令,然后执行'mvn clean package',则会自动将已编译好的前端文件打包进来。

      使用如下命令对程序进行打包:

      cd starwhale/server
      mvn clean package
    • 指定server启动所需的环境变量

      # Minio相关配置
      export SW_STORAGE_ENDPOINT=http://${Minio IP,默认为127.0.0.1}:9000
      export SW_STORAGE_BUCKET=${Minio bucket,默认为starwhale}
      export SW_STORAGE_ACCESSKEY=${Minio accessKey,默认为starwhale}
      export SW_STORAGE_SECRETKEY=${Minio secretKey,默认为starwhale}
      export SW_STORAGE_REGION=${Minio region,默认为local}
      # kubernetes配置
      export KUBECONFIG=${.kube配置文件所在路径}\.kube\config

      export SW_INSTANCE_URI=http://${Server服务所在机器IP}:8082
      # Mysql相关配置
      export SW_METADATA_STORAGE_IP=${Mysql IP,默认为127.0.0.1}
      export SW_METADATA_STORAGE_PORT=${Mysql port,默认为3306}
      export SW_METADATA_STORAGE_DB=${Mysql dbname,默认为starwhale}
      export SW_METADATA_STORAGE_USER=${Mysql user,默认为starwhale}
      export SW_METADATA_STORAGE_PASSWORD=${user password,默认为starwhale}
    • 部署server服务

      使用IDE或如下方式部署均可。

      java -jar controller/target/starwhale-controller-0.1.0-SNAPSHOT.jar
    • 功能调试

      这里有两种方式对修改的功能进行调试:

      • 使用swagger-ui进行接口调试,访问 /swagger-ui/index.html找到对应的api即可。
      • 或直接在ui访问,进行相应功能的调试(前提是打包时已经按说明将前端代码进行了提前构建)
    - + \ No newline at end of file diff --git a/zh/0.5.10/concepts/index.html b/zh/0.5.10/concepts/index.html index b500a6ec8..ce3ef283f 100644 --- a/zh/0.5.10/concepts/index.html +++ b/zh/0.5.10/concepts/index.html @@ -10,13 +10,13 @@ - + - + \ No newline at end of file diff --git a/zh/0.5.10/concepts/names/index.html b/zh/0.5.10/concepts/names/index.html index 6393d5c19..964306fc2 100644 --- a/zh/0.5.10/concepts/names/index.html +++ b/zh/0.5.10/concepts/names/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    Starwhale中的命名规则

    下文的命名是指对Starwhale中的项目、模型、数据集、运行时以及版本标签进行命名。

    名称限制

    • 名称不区分大小写。
    • 名称必须仅由大小写字母“A-Z a-z”、数字“0-9”、连字符“-”、点“.”和下划线“_”组成。
    • 名称应始终以字母或“_”字符开头。
    • 名称的最大长度为80。

    名称唯一性要求

    • 资源名称在其所影响范围内必须是唯一的。例如,项目名称在实例中必须是唯一的,模型名称在其所在项目中必须是唯一的。
    • 同一个项目下同类资源必须使用不同的名称,包括那些已删除的资源。 例如,项目“Apple”不能有两个名为“Alice”的模型,即使其中一个已经被删除。
    • 不同种类的资源可以有相同的名称。 例如,一个项目、一个模型和一个数据集可以同时被命名为“Alice”。
    • 不同项目的资源可以具有相同的名称。 例如,“Apple”项目中的模型和“Banana”项目中的模型可以具有相同的名称“Alice”。
    • 已经被垃圾回收的资源名称可以重复使用。 例如,将项目“Apple”中名称为“Alice”的模型移除并进行垃圾回收后,该项目可以有一个新的同名模型“Alice”。
    - + \ No newline at end of file diff --git a/zh/0.5.10/concepts/project/index.html b/zh/0.5.10/concepts/project/index.html index bff404457..e8aca0fb0 100644 --- a/zh/0.5.10/concepts/project/index.html +++ b/zh/0.5.10/concepts/project/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    Starwhale中的项目

    “项目”是组织不同资源(如模型、数据集等)的基本单位。您可以将项目用于不同的目的。例如,您可以为数据科学家团队、产品线或特定模型创建项目。用户通常在日常工作中会参与一个或多个项目。

    Starwhale Server/Cloud 项目按账号分组。Starwhale Standalone 没有帐号概念。所以您不会在S tarwhale Standalone 项目中看到任何帐号前缀。Starwhale Server/Cloud项目可以是“公共”或“私有”。公共项目意味着同一实例上的所有用户在默认情况下都自动成为该项目的“访客”角色。有关角色的更多信息,请参阅Starwhale中的角色和权限

    Starwhale Standalone会自动创建一个“self”项目并将其配置为默认项目。

    - + \ No newline at end of file diff --git a/zh/0.5.10/concepts/roles-permissions/index.html b/zh/0.5.10/concepts/roles-permissions/index.html index 294279a22..bba6acec8 100644 --- a/zh/0.5.10/concepts/roles-permissions/index.html +++ b/zh/0.5.10/concepts/roles-permissions/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    Starwhale中的角色和权限

    角色用于为用户分配权限。只有Starwhale Server/Cloud有角色和权限,Starwhale Standalone没有相应概念。系统会自动创建一个管理员角色并分配给默认用户“starwhale”。一些敏感操作只能由具有管理员角色的用户执行,例如在Starwhale Server中创建新的账号。

    每个项目具有三类角色:

    • 管理员Admin - 项目管理员可以读写项目数据并将项目角色分配给用户。
    • 维护者Maintainer - 项目维护者可以读写项目数据。
    • 访客Guest - 项目访客只能读取项目数据。
    动作管理员Admin维护者Maintainer访客Guest
    管理项目成员
    编辑项目
    查看项目
    创建评价
    删除评价
    查看评价
    创建数据集
    更新数据集
    删除数据集
    查看数据集
    创建模型
    更新模型
    删除模型
    查看型号
    创建运行时
    更新运行时间
    删除运行时
    查看运行时间

    创建项目的用户成为第一个项目管理员。他可以在这之后将角色分配给其他用户。

    - + \ No newline at end of file diff --git a/zh/0.5.10/concepts/versioning/index.html b/zh/0.5.10/concepts/versioning/index.html index e27bde449..ed0fffeb4 100644 --- a/zh/0.5.10/concepts/versioning/index.html +++ b/zh/0.5.10/concepts/versioning/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    Starwhale中的资源版本控制

    • Starwhale管理所有模型、数据集和运行时的历史记录。对特定资源的每次更新都会附加一个新版本的历史记录。
    • 版本由version id标识。version id是由 Starwhale自动生成的随机字符串,并按其创建时间排序。
    • 版本可以有标签。Starwhale使用版本标签来提供人性化的版本表示。默认情况下,Starwhale会为每个版本附加一个默认标签。默认标记是字母“v”后跟一个数字。对于每个版本化的资源,第一个版本标签始终标记为“v0”,第二个版本标记为“v1”,依此类推。有一个特殊的标签“latest”总是指向最新的版本。删除版本后,将不会重复使用其默认标签。例如,有一个带有标签“v0、v1、v2”的模型。 删除“v2”后,标签将为“v0、v1”。 接下来一个标签将是“v3”而不是“v2”。您可以将自己定义的标签附加到任何版本并随时删除它们。
    • Starwhale使用线性历史,不提供分支。
    • Starwhale资源无法真正回滚。当要恢复某个历史版本时,Starwhale会复制该版本数据并将其作为新版本追加到历史记录的末尾。您可以手动删除和恢复历史版本。
    - + \ No newline at end of file diff --git a/zh/0.5.10/dataset/index.html b/zh/0.5.10/dataset/index.html index 8ff05719c..ac570bac5 100644 --- a/zh/0.5.10/dataset/index.html +++ b/zh/0.5.10/dataset/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    Starwhale 数据集

    设计概述

    Starwhale Dataset 定位

    Starwhale Dataset 包含数据构建、数据加载和数据可视化三个核心阶段,是一款面向ML/DL领域的数据管理工具。Starwhale Dataset 能直接使用 Starwhale Runtime 构建的环境,能被 Starwhale ModelStarwhale Evaluation 无缝集成,是 Starwhale MLOps 工具链的重要组成部分。

    根据 Machine Learning Operations (MLOps): Overview, Definition, and Architecture 对MLOps Roles的分类,Starwhale Dataset的三个阶段针对用户群体如下:

    • 数据构建:Data Engineer、Data Scientist
    • 数据加载:Data Scientist、ML Developer
    • 数据可视化:Data Engineer、Data Scientist、ML Developer

    mlops-users

    核心功能

    • 高效加载:数据集原始文件存储在 OSS 或 NAS 等外部存储上,使用时按需加载,不需要数据落盘。
    • 简单构建:既支持从 Image/Video/Audio 目录、json文件和 Huggingface 数据集等来源一键构建数据集,又支持编写 Python 代码构建完全自定义的数据集。
    • 版本管理:可以进行版本追踪、数据追加等操作,并通过内部抽象的 ObjectStore,避免数据重复存储。
    • 数据集分发:通过 swcli dataset copy 命令,实现 Standalone 实例和 Cloud/Server 实例的双向数据集分享。
    • 数据可视化:Cloud/Server 实例的 Web 界面中可以对数据集提供多维度、多类型的数据呈现。
    • 制品存储:Standalone 实例能存储本地构建或分发的 swds 系列文件,Cloud/Server 实例使用对象存储提供集中式的 swds 制品存储。
    • Starwhale无缝集成Starwhale Dataset 能使用 Starwhale Runtime 构建的运行环境构建数据集。Starwhale EvaluationStarwhale Model 直接通过 --dataset 参数指定数据集,就能完成自动数据加载,便于进行推理、模型评测等环境。

    关键元素

    • swds 虚拟包文件:swdsswmpswrt 不一样,不是一个打包的单一文件,而是一个虚拟的概念,具体指的是一个目录,是 Starwhale 数据集某个版本包含的数据集相关的文件,包括 _manifest.yaml, dataset.yaml, 数据集构建的Python脚本和数据文件的链接等。可以通过 swcli dataset info 命令查看swds所在目录。swds 是Starwhale Dataset 的简写。

    swds-tree.png

    • swcli dataset 命令行:一组dataset相关的命令,包括构建、分发和管理等功能,具体说明参考CLI Reference
    • dataset.yaml 配置文件:描述数据集的构建过程,可以完全省略,通过 swcli dataset build 参数指定,可以认为 dataset.yaml 是build命令行参数的一种配置文件表示方式。swcli dataset build 参数优先级高于 dataset.yaml
    • Dataset Python SDK:包括数据构建、数据加载和若干预定义的数据类型,具体说明参考Python SDK
    • 数据集构建的 Python 脚本:使用 Starwhale Python SDK 编写的用来构建数据集的一系列脚本。

    最佳实践

    Starwhale Dataset 的构建是独立进行的,如果编写构建脚本时需要引入第三方库,那么使用 Starwhale Runtime 可以简化 Python 的依赖管理,能保证数据集的构建可复现。Starwhale 平台会尽可能多的内建开源数据集,让用户 copy 下来数据集后能立即使用。

    命令行分组

    Starwhale Dataset 命令行从使用阶段的角度上,可以划分如下:

    • 构建阶段
      • swcli dataset build
    • 可视化阶段
      • swcli dataset diff
      • swcli dataset head
    • 分发阶段
      • swcli dataset copy
    • 基本管理
      • swcli dataset tag
      • swcli dataset info
      • swcli dataset history
      • swcli dataset list
      • swcli dataset summary
      • swcli dataset remove
      • swcli dataset recover

    Starwhale Dataset Viewer

    目前 Cloud/Server 实例中 Web UI 可以对数据集进行可视化展示,目前只有使用 Python SDK 的DataType 才能被前端正确的解释,映射关系如下:

    • Image:展示缩略图、放大图、MASK类型图片,支持 image/pngimage/jpegimage/webpimage/svg+xmlimage/gifimage/apngimage/avif 格式。
    • Audio:展示为音频wave图,可播放,支持 audio/mp3audio/wav 格式。
    • Video:展示为视频,可播放,支持 video/mp4video/avivideo/webm 格式。
    • GrayscaleImage:展示灰度图,支持 x/grayscale 格式。
    • Text:展示文本,支持 text/plain 格式,设置设置编码格式,默认为utf-8。
    • Binary和Bytes:暂不支持展示。
    • Link:上述几种多媒体类型都支持指定link作为存储路径。

    Starwhale Dataset 数据格式

    数据集由多个行组成,每个行成为为一个样本,每个样本包含若干 features ,features 是一个类 dict 结构,对key和value有一些简单的限制[L]

    • dict的key必须为str类型。
    • dict的value必须是 int/float/bool/str/bytes/dict/list/tuple 等 Python 的基本类型,或者 Starwhale 内置的数据类型
    • 不同样本的数据相同key的value,不需要保持同一类型。
    • 如果value是list或者tuple,其元素的数据类型必须一致。
    • value为dict时,其限制等同于限制[L]

    例子:

    {
    "img": GrayscaleImage(
    link=Link(
    "123",
    offset=32,
    size=784,
    _swds_bin_offset=0,
    _swds_bin_size=8160,
    )
    ),
    "label": 0,
    }

    文件类数据的处理方式

    Starwhale Dataset 对文件类型的数据进行了特殊处理,如果您不关心 Starwhale 的实现方式,可以忽略本小节。

    根据实际使用场景,Starwhale Dataset 对基类为 starwhale.BaseArtifact 的文件类数据有两种处理方式:

    • swds-bin: Starwhale 以自己的二进制格式 (swds-bin) 将数据合并成若干个大文件,能高效的进行索引、切片和加载。
    • remote-link: 满足用户的原始数据存放在某些外部存储上,比如 OSS 或 NAS 等,原始数据较多,不方便搬迁或者已经用一些内部的数据集实现进行封装过,那么只需要在数据中使用 link,就能建立索引。

    在同一个Starwhale 数据集中,可以同时包含两种类型的数据。

    - + \ No newline at end of file diff --git a/zh/0.5.10/dataset/yaml/index.html b/zh/0.5.10/dataset/yaml/index.html index d5ff7e7b2..d36300c5a 100644 --- a/zh/0.5.10/dataset/yaml/index.html +++ b/zh/0.5.10/dataset/yaml/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    dataset.yaml 使用指南

    提示

    dataset.yaml 对于 swcli dataset build 构建数据集的过程是非必要的。

    Starwhale Dataset 构建的时候使用 dataset.yaml,若省略 dataset.yaml,则可以在 swcli dataset build 命令行参数中描述相关配置,可以认为 dataset.yamlbuild 命令行的配置文件化表述。

    YAML 字段描述

    字段描述是否必要类型默认值
    nameStarwhale Dataset的名字String
    handler为一个函数,返回一个Generator或一个可迭代的对象或一个实现 __iter__ 方法的类,格式为 {module 路径}:{类名函数名}String
    desc数据集描述信息String""
    versiondataset.yaml格式版本,目前仅支持填写 1.0String1.0
    attr数据集构建参数Dict
    attr.volume_sizeswds-bin格式的数据集每个data文件的大小。当写数字时,单位bytes;也可以是数字+单位格式,如64M, 1GB等Int或Str64MB
    attr.alignment_sizeswds-bin格式的数据集每个数据块的数据alignment大小,如果设置alignment_size为4k,数据块大小为7.9K,则会补齐0.1K的空数据,让数据块为alignment_size的整数倍,提升page size等读取效率Integer或String128

    使用示例

    最简示例

    name: helloworld
    handler: dataset:ExampleProcessExecutor

    helloworld的数据集,使用dataset.yaml目录中dataset.py文件中的 ExampleProcessExecutor 类进行数据构建。

    MNIST数据集构建示例

    name: mnist
    handler: mnist.dataset:DatasetProcessExecutor

    desc: MNIST data and label test dataset

    attr:
    alignment_size: 1k
    volume_size: 4M

    handler为generator function的例子

    dataset.yaml 内容:

    name: helloworld
    handler: dataset:iter_item

    dataset.py 内容:

    def iter_item():
    for i in range(10):
    yield {"img": f"image-{i}".encode(), "label": i}

    本例中,handler为一个generator function,Starwhale SDK根据首个yield出来的元素为非Starwhale.Link类型,等同于继承 starwhale.SWDSBinBuildExecutor 类。

    - + \ No newline at end of file diff --git a/zh/0.5.10/evaluation/heterogeneous/node-able/index.html b/zh/0.5.10/evaluation/heterogeneous/node-able/index.html index 5450f7f8c..2ba80b353 100644 --- a/zh/0.5.10/evaluation/heterogeneous/node-able/index.html +++ b/zh/0.5.10/evaluation/heterogeneous/node-able/index.html @@ -10,7 +10,7 @@ - + @@ -24,7 +24,7 @@ 参考 链接

    v0.13.0-rc.1 为例

    kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.13.0-rc.1/nvidia-device-plugin.yml

    注意: 此操作会在所有的 K8s 节点中运行 NVIDIA 的 device plugin 插件, 如果之前配置过, 则会被更新, 请谨慎评估使用的镜像版本

  • 确认 GPU 可以在集群中发现和使用 参考下边命令, 查看 Jetson 节点的 Capacity 中有 nvidia.com/gpu, GPU 即被 K8s 集群正常识别

    # kubectl describe node orin | grep -A15 Capacity
    Capacity:
    cpu: 12
    ephemeral-storage: 59549612Ki
    hugepages-1Gi: 0
    hugepages-2Mi: 0
    hugepages-32Mi: 0
    hugepages-64Ki: 0
    memory: 31357608Ki
    nvidia.com/gpu: 1
    pods: 110
  • 制作和使用自定义镜像

    文章前面提到的 l4t-jetpack 镜像可以满足我们一般的使用, 如果我们需要自己定制更加精简或者更多功能的镜像, 可以基于 l4t-base 来制作 相关 Dockerfile 可以参考 Starwhale为mnist制作的镜像

    - + \ No newline at end of file diff --git a/zh/0.5.10/evaluation/heterogeneous/virtual-node/index.html b/zh/0.5.10/evaluation/heterogeneous/virtual-node/index.html index c4f311b43..1ae9b5e93 100644 --- a/zh/0.5.10/evaluation/heterogeneous/virtual-node/index.html +++ b/zh/0.5.10/evaluation/heterogeneous/virtual-node/index.html @@ -10,7 +10,7 @@ - + @@ -19,7 +19,7 @@ 此方案被各云厂商广泛用于 serverless 容器集群方案, 比如阿里云的 ASK, Amazon 的 AWS Fargate 等.

    原理

    virtual kubelet 框架将 kubelet 对于 Node 的相关接口进行实现, 只需要简单的配置即可模拟一个节点. 我们只需要实现 PodLifecycleHandler 接口即可支持:

    • 创建, 更新, 删除 Pod
    • 获取 Pod 状态
    • 获取 Container 日志

    将设备加入集群

    如果我们的设备由于资源限制等情况无法作为 K8s 的一个节点进行服务, 那么我们可以通过使用 virtual kubelet 模拟一个代理节点的方式对这些设备进行管理, Starwhale Controller 和设备的控制流如下


    ┌──────────────────────┐ ┌────────────────┐ ┌─────────────────┐ ┌────────────┐
    │ Starwhale Controller ├─────►│ K8s API Server ├────►│ virtual kubelet ├────►│ Our device │
    └──────────────────────┘ └────────────────┘ └─────────────────┘ └────────────┘

    virtual kubelet 将 Starwhale Controller 下发下来的 Pod 编排信息转化为对设备的控制行为, 比如 ssh 到设备上执行一段命令, 或者通过 USB 或者串口发送一段消息等.

    下面是使用 virtual kubelet 的方式来对一个未加入集群的可以 ssh 的设备进行控制的示例

    1. 准备证书
    • 创建文件 vklet.csr, 内容如下
    [req]
    req_extensions = v3_req
    distinguished_name = req_distinguished_name
    [req_distinguished_name]
    [v3_req]
    basicConstraints = CA:FALSE
    keyUsage = digitalSignature, keyEncipherment
    extendedKeyUsage = serverAuth
    subjectAltName = @alt_names
    [alt_names]
    IP = 1.2.3.4
    • 生成证书
    openssl genrsa -out vklet-key.pem 2048
    openssl req -new -key vklet-key.pem -out vklet.csr -subj '/CN=system:node:1.2.3.4;/C=US/O=system:nodes' -config ./csr.conf
    • 提交证书
    cat vklet.csr| base64 | tr -d "\n" # 输出内容作为 csr.yaml 文件中 spec.request 的内容

    csr.yaml

    apiVersion: certificates.k8s.io/v1
    kind: CertificateSigningRequest
    metadata:
    name: vklet
    spec:
    request: ******************************************************
    signerName: kubernetes.io/kube-apiserver-client
    expirationSeconds: 1086400
    usages:
    - client auth
     kubectl apply -f csr.yaml
    kubectl certificate approve vklet
    kubectl get csr vklet -o jsonpath='{.status.certificate}'| base64 -d > vklet-cert.pem

    现在我们得到了 vklet-cert.pem

    • 编译 virtual kubelet
    git clone https://github.com/virtual-kubelet/virtual-kubelet
    cd virtual-kubelet && make build

    创建节点的配置文件 mock.json

    {
    "virtual-kubelet":
    {
    "cpu": "100",
    "memory": "100Gi",
    "pods": "100"
    }
    }

    启动 virtual kubelet

    export APISERVER_CERT_LOCATION=/path/to/vklet-cert.pem
    export APISERVER_KEY_LOCATION=/path/to/vklet-key.pem
    export KUBECONFIG=/path/to/kubeconfig

    virtual-kubelet --provider mock --provider-config /path/to/mock.json

    至此, 我们使用 virtual kubelet 模拟了一个 100 core + 100G 内存的节点.

    • 增加 PodLifecycleHandler 的实现, 将 Pod 编排中的重要信息转化为 ssh 命令执行, 并且收集日志待 Starwhale Controller 收集

    具体实现可参考 ssh executor

    - + \ No newline at end of file diff --git a/zh/0.5.10/evaluation/index.html b/zh/0.5.10/evaluation/index.html index d04d3ac52..cffb5080e 100644 --- a/zh/0.5.10/evaluation/index.html +++ b/zh/0.5.10/evaluation/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    Starwhale 模型评测

    设计概述

    Starwhale Evaluation 定位

    Starwhale Evaluation 目标是对模型评测进行全流程管理,包括创建 Job、分发 Task、查看模型评测报告和基本管理等。Starwhale Evaluation 是 Starwhale构建的 MLOps 工具链使用 Starwhale ModelStarwhale DatasetStarwhale Runtime 三个基础元素,在模型评测这个场景上的具体应用,后续还会包含 Starwhale Model ServingStarwhale Training 等应用场景。

    核心功能

    • 可视化展示swcli和 Web UI都提供对模型评测结果的可视化展示,支持多个结果的对比等功能,同时用户可以自定义记录评测中间过程。
    • 多场景适配:不管是在笔记本的单机环境,还是在分布式服务器集群环境,都能使用统一的命令、Python脚本、制品和操作方法进行模型评测,满足不同算力、不同数据量的外部环境要求。
    • Starwhale无缝集成:使用Starwhale Runtime提供的运行环境,将 Starwhale Dataset 作为数据输入,在 Starwhale Model 中运行模型评测任务,不管是在 swcli、Python SDK 还是 Cloud/Server 实例 Web UI中,都能简单的进行配置。

    关键元素

    • swcli model run 命令行: 能够完成模型的批量、离线式评测。
    • swcli model serve 命令行: 能够完成模型的在线评测。

    最佳实践

    命令行分组

    从完成 Starwhale Evaluation 全流程任务的角度,可以将所涉及的命令分组如下:

    • 基础准备阶段
      • swcli dataset build 或 Starwhale Dataset Python SDK
      • swcli model build 或 Starwhale Model Python SDK
      • swcli runtime build
    • 评测阶段
      • swcli model run
      • swcli model serve
    • 结果展示阶段
      • swcli job info
    • 基本管理
      • swcli job list
      • swcli job remove
      • swcli job recover

    job-step-task 抽象

    • job: 一次模型评测任务就是一个 job,一个 job 包含一个或多个 step
    • step: step 对应评测过程中的某个阶段。使用PipelineHandler的默认评测过程,step就是predictevaluate;用户自定义的评测过程,step 就是使用 @handler, @evaluation.predict, @evaluation.evaluate 修饰的函数。step 之间可以有依赖关系,形成一个DAG。一个 step 包含一个或多个 task。同一 step 中的不同 task,执行逻辑是一致的,只是输入参数不同,常见做法是将数据集分割成若干部分,然后传入每个task 中,task 可以并行执行。
    • task: task 是最终运行的实体。在 Cloud/Server 实例中,一个 task 就是一个Pod的container; 在Standalone 实例中,一个 task 就是一个 Python Thread。

    job-step-task 的抽象是实现 Starwhale Evaluation 分布式运行的基础。

    - + \ No newline at end of file diff --git a/zh/0.5.10/faq/index.html b/zh/0.5.10/faq/index.html index 001a35578..7969dee76 100644 --- a/zh/0.5.10/faq/index.html +++ b/zh/0.5.10/faq/index.html @@ -10,13 +10,13 @@ - +
    - + \ No newline at end of file diff --git a/zh/0.5.10/getting-started/cloud/index.html b/zh/0.5.10/getting-started/cloud/index.html index 8c8c72f14..459ea6e8a 100644 --- a/zh/0.5.10/getting-started/cloud/index.html +++ b/zh/0.5.10/getting-started/cloud/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    Starwhale Cloud入门指南

    Starwhale Cloud运行在阿里云上,域名是 https://cloud.starwhale.cn ,后续我们会推出部署在AWS上的 https://cloud.starwhale.ai 服务,需要注意的是,这是两个相互独立的实例,帐户和数据不共享。您可以选择任何一个开始。

    在开始之前,您需要先安装Starwhale Client(swcli)

    注册Starwhale Cloud并创建您的第一个项目

    您可以直接使用自己的GitHub或微信帐号登录,也可以注册一个新的帐号。如果您使用 GitHub 或 微信帐号登录,系统会要求您提供用户名。

    然后您可以创建一个新项目。在本教程中,我们将使用名称 demo 作为项目名称。

    在本地机器上构建数据集、模型和运行时

    按照Starwhale Standalone入门指南中的步骤1到步骤4在本地机器上创建:

    • 一个名为mnist的Starwhale模型
    • 一个名为mnist的Starwhale数据集
    • 一个名为pytorch的Starwhale运行时

    登录云实例

    swcli instance login --username <您的用户名> --password <您的密码> --alias swcloud https://cloud.starwhale.cn

    将数据集、模型和运行时复制到Starwhale Cloud

    swcli model copy mnist swcloud/project/demo
    swcli dataset copy mnist swcloud/project/demo
    swcli runtime copy pytorch swcloud/project/demo

    使用 Web UI 运行评估

    console-create-job.gif

    恭喜! 您已完成Starwhale Cloud的入门指南。

    - + \ No newline at end of file diff --git a/zh/0.5.10/getting-started/index.html b/zh/0.5.10/getting-started/index.html index 95741aac8..ecf386d6a 100644 --- a/zh/0.5.10/getting-started/index.html +++ b/zh/0.5.10/getting-started/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    入门指南

    首先,您需要安装Starwhale Client(swcli),可以运行如下命令:

    python3 -m pip install starwhale

    更多详细信息请参阅swcli安装指南

    根据您使用的实例类型,您可以参考以下三个入门指南:

    • Starwhale Standalone入门指南 - 本指南可帮助您在台式PC/笔记本电脑上运行一个MNIST评估。这是开始使用Starwhale最快最简单的方法。
    • Starwhale Server入门指南 - 本指南可帮助您在私有服务器上安装Starwhale Server并运行一个MNIST评估。在本指南结束时,您将拥有一个Starwhale Server实例,您可以在其中管理您的数据集和模型。
    • Starwhale Cloud入门指南 - 本指南可帮助您在Starwhale Cloud上创建帐户并运行MNIST评估。这是体验所有Starwhale功能的最简单方法。
    - + \ No newline at end of file diff --git a/zh/0.5.10/getting-started/runtime/index.html b/zh/0.5.10/getting-started/runtime/index.html index 3954e5cf0..6e2ef2b3b 100644 --- a/zh/0.5.10/getting-started/runtime/index.html +++ b/zh/0.5.10/getting-started/runtime/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    Starwhale Runtime入门指南

    本文演示如何搭建Pytorch环境的Starwhale Runtime以及如何在不同环境中使用它。该runtime可以满足Starwhale中六个例子的依赖需求:mnist、speech commands、nmt、cifar10、ag_news、PennFudan。相关代码链接:example/runtime/pytorch

    您可以从本教程中学到以下内容:

    • 如何构建Starwhale Runtime。
    • 如何在不同场景下使用Starwhale Runtime。
    • 如何发布Starwhale Runtime。

    前置条件

    基础环境

    运行以下命令以克隆示例代码:

    git clone https://github.com/star-whale/starwhale.git
    cd starwhale/example/runtime/pytorch-cn-mirror #非中国大陆网络可使用pytorch例子

    构建Starwhale Runtime

    ❯ swcli -vvv runtime build --yaml runtime.yaml

    在Standalone Instance中使用Starwhale Runtime

    在shell中使用Starwhale Runtime

    # 激活runtime
    swcli runtime activate pytorch-cn-mirror

    swcli runtime activate会下载runtime的所有python依赖,并在当前shell环境中激活该环境。这个过程可能需要很长时间。

    当runtime被激活时,所有依赖项都已在您的python环境中准备就绪,类似于virtualenv的source venv/bin/activate或者conda的conda activate命令。如果您关闭了shell或切换到另一个shell,则下次使用之前需要重新激活这个runtime。

    在swcli中使用Starwhale Runtime

    # 模型构建中使用runtime
    swcli model build . --runtime pytorch-cn-mirror
    # 数据集构建中使用runtime
    swcli dataset build . --runtime pytorch-cn-mirror
    # 模型评测中使用runtime
    swcli model run --uri mnist/version/v0 --dataset mnist --runtime pytorch-cn-mirror

    将 Starwhale Runtime 复制到另一个实例

    您可以将运行时复制到Server/Cloud实例,然后可以在Server/Cloud实例中使用或由其他用户下载。

    # 将runtime复制到名为“pre-k8s”的Server实例
    ❯ swcli runtime copy pytorch-cn-mirror cloud://pre-k8s/project/starwhale
    - + \ No newline at end of file diff --git a/zh/0.5.10/getting-started/server/index.html b/zh/0.5.10/getting-started/server/index.html index e41daf613..1213783d8 100644 --- a/zh/0.5.10/getting-started/server/index.html +++ b/zh/0.5.10/getting-started/server/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    Starwhale Server入门指南

    安装Starwhale Server

    安装 Starwhale Server,参见安装指南

    创建您的第一个项目

    登录服务器

    打开浏览器并在地址栏中输入服务器的 URL。 使用默认用户名(starwhale)和密码(abcd1234)登录。

    console-artifacts.gif

    创建一个新项目

    在本地机器上构建数据集、模型和运行时

    按照Starwhale Standalone入门指南中的步骤1到步骤4在本地机器上创建:

    • 一个名为mnist的Starwhale模型
    • 一个名为mnist的Starwhale数据集
    • 一个名为pytorch的Starwhale运行时

    将数据集、模型和运行时复制到Starwhale Server

    swcli instance login --username <your username> --password <your password> --alias server <Your Server URL>

    swcli model copy mnist server/project/demo
    swcli dataset copy mnistserver/project/demo
    swcli runtime copy pytorch server/project/demo

    使用Web UI运行模型评估

    使用浏览器打开“demo”项目并创建一个新的评估。

    console-create-job.gif

    恭喜! 您已完成Starwhale Server的入门指南。

    - + \ No newline at end of file diff --git a/zh/0.5.10/getting-started/standalone/index.html b/zh/0.5.10/getting-started/standalone/index.html index 442bee615..5ee6ad3f8 100644 --- a/zh/0.5.10/getting-started/standalone/index.html +++ b/zh/0.5.10/getting-started/standalone/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    Starwhale Standalone入门指南

    Starwhale Client(swcli)安装完成后,您就可以使用Starwhale Standalone。

    我们也提供对应的Jupyter Notebook例子,可以在 Google Colab 或本地的 vscode/jupyterlab 中试用。

    下载例子

    通过以下方式克隆Starwhale项目来下载Starwhale示例:

    GIT_LFS_SKIP_SMUDGE=1 git clone https://github.com/star-whale/starwhale.git --depth 1
    cd starwhale

    为了节省例子的下载时间,我们执行git clone命令时,忽略了git-lfs,并只保留最近一次的commit信息。我们选用ML/DL领域的HelloWorld程序-MNIST来介绍如何从零开始构建数据集、模型包和运行环境,并最终完成模型评测。接下来的操作都在 starwhale 目录中进行。

    核心工作流程

    构建 Pytorch 运行时

    运行时示例代码位于example/runtime/pytorch目录中。

    • 构建Starwhale运行时包:

      swcli runtime build --yaml example/runtime/pytorch/runtime.yaml
      提示

      当首次构建Starwhale Runtime时,由于需要创建venv或conda隔离环境,并下载相关的Python依赖,命令执行需要花费一段时间。时间长短取决与所在机器的网络情况和runtime.yaml中Python依赖的数量。建议合理设置机器的 ~/.pip/pip.conf 文件,填写缓存路径和适合当前网络环境的pypi mirror地址。

      处于中国大陆网络环境中的用户,可以参考如下配置:

      [global]
      cache-dir = ~/.cache/pip
      index-url = https://pypi.tuna.tsinghua.edu.cn/simple
      extra-index-url = https://mirrors.aliyun.com/pypi/simple/
    • 检查您本地的Starwhale运行时:

      swcli runtime list
      swcli runtime info pytorch

    构建模型

    模型示例代码位于 example/mnist 目录中。

    • 下载预训练模型文件:

      cd example/mnist
      CN=1 make download-model
      # 非中国大陆网络用户,可以省略 CN=1 环境变量
      cd -
    • 构建一个Starwhale模型:

      swcli model build example/mnist --runtime pytorch
    • 检查您本地的Starwhale模型:

      swcli model list
      swcli model info mnist

    构建数据集

    数据集示例代码位于 example/mnist 目录中。

    • 下载MNIST原始数据:

      cd example/mnist
      CN=1 make download-data
      # 非中国大陆网络用户,可以省略 CN=1 环境变量
      cd -
    • 构建Starwhale数据集:

      swcli dataset build --yaml example/mnist/dataset.yaml
    • 检查您本地的Starwhale数据集:

      swcli dataset list
      swcli dataset info mnist
      swcli dataset head mnist

    运行评估作业

    • 创建评估工作

      swcli -vvv model run --uri mnist --dataset mnist --runtime pytorch
    • 检查评估结果

      swcli job list
      swcli job info $(swcli job list | grep mnist | grep success | awk '{print $1}' | head -n 1)

    恭喜! 您已完成Starwhale Standalone的入门指南。

    - + \ No newline at end of file diff --git a/zh/0.5.10/index.html b/zh/0.5.10/index.html index 58e844d82..2183d0bef 100644 --- a/zh/0.5.10/index.html +++ b/zh/0.5.10/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    什么是Starwhale

    概述

    Starwhale是一个 MLOps/LLMOps平台,能够让您的模型创建、评估和发布流程变得更加轻松。它旨在为数据科学家和机器学习工程师创建一个方便的工具。

    Starwhale能够帮助您:

    • 跟踪您的训练/测试数据集历史记录,包括所有数据项及其相关标签,以便您轻松访问它们。
    • 管理您可以在团队中共享的模型包。
    • 在不同的环境中运行您的模型,无论是在 Nvidia GPU服务器上还是在嵌入式设备(如 Cherry Pi)上。
    • 为您的模型快速创建配备交互式 Web UI的在线服务。

    同时,Starwhale 是一个开放的平台,您可以创建插件来满足自己的需求。

    部署选项

    Starwhale的每个部署称为一个实例。所有实例都可以通过Starwhale Client(swcli)进行管理。

    您可以任选以下实例类型之一开始使用:

    • Starwhale Standalone - Starwhale Standalone 本质上是一套存储在本地文件系统中的数据库。它由 Starwhale Client(swcli)创建和管理。您只需安装 swcli 即可使用。目前,一台机器上的每个用户只能拥有一个Starwhale Standalone 实例。我们建议您使用 Starwhale Standalone 来构建和测试您的数据集和模型,然后再将它们推送到 Starwhale Server/Cloud 实例。
    • Starwhale Server - Starwhale Server 是部署在您本地服务器上的服务。除了 Starwhale Client(swcli)的文本交互界面,Starwhale Server还提供 Web UI供您管理数据集和模型,以及在Kubernetes集群中运行模型并查看运行结果。
    • Starwhale Cloud - Starwhale Cloud 是托管在公共云上的服务。 通过在https://cloud.starwhale.cn注册一个账号,您就可以使用Starwhale,而无需安装、运行和维护您自己的实例。 Starwhale Cloud 还提供公共资源供您下载,例如一些流行的开源集数据集、模型和运行时。查看 Starwhale Cloud 实例上的 “starwhale/public”项目以获取更多详细信息。

    在您决定要使用的实例类型时,请考虑以下因素:

    实例类型部署位置维护者用户界面可扩展性
    Starwhale Standalone您的笔记本电脑或本地服务器不需要命令行不可扩展
    Starwhale Server您的数据中心您自己Web UI和命令行可扩展,取决于您的 Kubernetes 集群
    Starwhale Cloud公共云,如AWS或阿里云Starwhale团队Web UI和命令行可扩展,但目前受到云上免费可用资源的限制
    - + \ No newline at end of file diff --git a/zh/0.5.10/model/index.html b/zh/0.5.10/model/index.html index 5a206b4df..b28a684df 100644 --- a/zh/0.5.10/model/index.html +++ b/zh/0.5.10/model/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    Starwhale 模型

    Starwhale 模型是一种机器学习模型的标准包格式,可用于多种用途,例如模型微调、模型评估和在线服务。 Starwhale 模型包含模型文件、推理代码、配置文件等等。

    创建一个 Starwhale 模型

    创建 Starwhale 模型有两种方法:通过 swcli 或通过 SDK

    使用 swcli 创建 Starwhale 模型

    使用 swcli 创建 Starwhale 模型之前,您可以定义一个model.yaml,其中描述了关于Starwhale模型的一些必要信息,然后运行以下命令:

    swcli model build . --model-yaml /path/to/model.yaml

    有关该命令和 model.yaml 的更多信息,请参阅swcli参考。需要注意的是,model.yaml 是非必要的。

    使用 Python SDK 创建 Starwhale 模型

    from starwhale import model, predict

    @predict
    def predict_img(data):
    ...

    model.build(name="mnist", modules=[predict_img])

    管理 Starwhale 模型

    使用 swcli 管理 Starwhale 模型

    命令说明
    swcli model list列出项目中所有Starwhale模型
    swcli model info显示有关Starwhale模型的详细信息
    swcli model copy将Starwhale模型复制到另一个位置
    swcli model remove删除Starwhale模型
    swcli model recover恢复之前删除的Starwhale模型

    使用 Web 界面管理 Starwhale 模型

    管理 Starwhale 模型的历史版本

    Starwhale 模型是版本化的。关于版本的基本信息可以参考 Starwhale中的资源版本控制

    使用 swcli 管理 Starwhale 模型的历史版本

    命令说明
    swcli model history列出Starwhale模型的所有版本
    swcli model info显示某个Starwhale模型版本的详细信息
    swcli model diff比较两个版本的Starwhale模型
    swcli model copy复制某个Starwhale模型版本到新的版本
    swcli model remove删除某个Starwhale模型版本
    swcli model recover恢复以前删除的Starwhale模型版本

    模型评估

    使用swcli进行模型评估

    命令说明
    swcli model run指定某个Starwhale模型进行模型评估

    存储格式

    Starwhale模型是一个打包了原始目录的tar文件。

    - + \ No newline at end of file diff --git a/zh/0.5.10/model/yaml/index.html b/zh/0.5.10/model/yaml/index.html index 2cbb700cd..e05cd446a 100644 --- a/zh/0.5.10/model/yaml/index.html +++ b/zh/0.5.10/model/yaml/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    model.yaml 使用指南

    提示

    model.yaml 对于 swcli model build 构建模型的过程是非必要的。

    Starwhale Model 构建时,若使用 swcli model build 命令,可以通过 --model-yaml 参数指定符合特定格式的yaml文件,简化模型构建的参数指定。

    即使不指定 --model-yaml 参数,swcli model build 也会自动寻找 ${workdir} 目录下的 model.yaml 文件,会提取其中的参数。swcli model build 命令行中指定参数优先级大于 model.yaml 中的等价配置,可以认为 model.yamlbuild 命令行的配置文件化表述。

    当使用 Python SDK 方式构建 Starwhale 模型时,model.yaml 文件不生效。

    YAML 字段描述

    字段描述是否必要类型默认值
    nameStarwhale Model 的名字,等价于 --name 参数。String
    run.modules模型构建时搜索的Python Moduels,可以指定多个模型运行的入口点,格式为 Python 可 Imported 路径。等价于 --module 参数。List[String]
    run.handlerrun.modules的曾用写法,只能指定一个模型运行的入口点,已废弃String
    versiondataset.yaml格式版本,目前仅支持填写 1.0String1.0
    desc数据集描述信息,等价于 --desc 参数。String

    使用示例

    name: helloworld
    run:
    modules:
    - src.evaluator
    desc: "example yaml"

    名称为 helloworld 的 Starwhale 模型,搜索 swcli model build {WORKDIR} 命令中 ${WORKDIR} 目录相对的 src/evaluator.py 文件中被 @evaluation.predict, @evaluation.evaluate@handler 修饰的函数, 或继承自 PipelineHandler 的类,这些函数或类会被加入 Starwhale 模型可运行的入口点列表中,在 swcli model run 或 Web UI 运行时,选择对应的入口点(handler)运行模型。

    model.yaml 是非必要的,yaml 中定义参数可以在 swcli 命令行参数中指定。

    swcli model build . --model-yaml model.yaml

    等价于:

    swcli model build . --name helloworld --module src.evaluator --desc "example yaml"
    - + \ No newline at end of file diff --git a/zh/0.5.10/reference/sdk/dataset/index.html b/zh/0.5.10/reference/sdk/dataset/index.html index e19ff98a1..47e86c1f5 100644 --- a/zh/0.5.10/reference/sdk/dataset/index.html +++ b/zh/0.5.10/reference/sdk/dataset/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    Starwhale 数据集 SDK

    dataset

    获取 starwhale.Dataset 对象,包括创建新的数据集和加载已经存在的数据集两种方式。

    @classmethod
    def dataset(
    cls,
    uri: t.Union[str, Resource],
    create: str = _DatasetCreateMode.auto,
    readonly: bool = False,
    ) -> Dataset:

    参数

    • uri: (str 或 Resource, required)
      • Dataset URI 格式的字符串或 Resource 对象。
    • create: (str, optional)
      • 数据集创建模式,包括 auto, emptyforbid 三种方式。
        • auto 模式: 如果数据集已经存在,不会自动创建数据集;如果数据集不存在,则自动创建数据集。
        • empty 模式: 如果数据集已经存在,则抛出异常;如果数据集不存在,则自动创建数据集。
        • forbid 模式: 如果数据集已经存在,则不做任何事情;如果数据集不存在,则抛出异常。forbid 模式能确保数据集存在。
      • auto 模式是默认值。
    • readonly: (bool, optional)
      • 对于已经存在的数据集,可以指定 readonly=True 保证数据集以只读方式加载。
      • 默认值为 False

    使用示例

    from starwhale import dataset, Image

    # create a new dataset named mnist, and add a row into the dataset
    # dataset("mnist") is equal to dataset("mnist", create="auto")
    ds = dataset("mnist")
    ds.exists() # return False, "mnist" dataset is not existing.
    ds.append({"img": Image(), "label": 1})
    ds.commit()
    ds.close()

    # load a cloud instance dataset in readonly mode
    ds = dataset("cloud://remote-instance/project/starwhale/dataset/mnist", readonly=True)
    labels = [row.features.label in ds]
    ds.close()

    # load a read/write dataset with a specified version
    ds = dataset("mnist/version/mrrdczdbmzsw")
    ds[0].features.label = 1
    ds.commit()
    ds.close()

    # create an empty dataset
    ds = dataset("mnist-empty", create="empty")

    # ensure the dataset existence
    ds = dataset("mnist-existed", create="forbid")

    class starwhale.Dataset

    starwhale.Dataset 实现 Starwhale 数据集的抽象,能够对Standalone/Server/Cloud 实例上的数据集进行操作。

    from_huggingface

    from_huggingface 是一个 classmethod 方法,能够将 Huggingface 上的数据集转化为 Starwhale 数据集。

    def from_huggingface(
    cls,
    name: str,
    repo: str,
    subset: str | None = None,
    split: str | None = None,
    revision: str = "main",
    alignment_size: int | str = D_ALIGNMENT_SIZE,
    volume_size: int | str = D_FILE_VOLUME_SIZE,
    mode: DatasetChangeMode | str = DatasetChangeMode.PATCH,
    cache: bool = True,
    tags: t.List[str] | None = None,
    ) -> Dataset:

    参数

    • name: (str, required)
      • 数据集名称。
    • repo: (str, required)
    • subset: (str, optional)
      • Huggingface的数据集 subset 名称,如果HF数据集有多个 subsets, 您务必要指定一个 subset。
    • split: (str, optional)
      • Huggingface的数据集中 Split 名称。如果没有指定 split,则数据集中所有的 splits 数据都会被构建。
    • revision: (str, optional)
      • Huggingface的数据集版本,默认是 main,即main分支的最新一次提交。参数接受branch, tag 或 commit hash。
    • alignment_size: (int|str, optional)
      • 数据集 blob 文件中数据对齐的尺寸。
      • 默认值为 128,即128个字节对齐。
    • volume_size: (int|str, optional)
      • 数据集 blob 文件的最大尺寸,超过后会产生一个新的 blob 文件。
      • 默认值为 64MB,即每个blob文件64MB大小。
    • mode: (str|DatasetChangeMode, optional)
      • 数据集更新的模式,包括 patch 模式和 overwrite 模式。
      • 默认为 patch 模式。
    • cache: (bool, optional)
      • 是否使用 Huggingface 的本地缓存。
      • 默认使用缓存。
      • 缓存 = 下载文件缓存 + 本地Huggingface 数据集缓存。
    • tags: (List[str], optional)
      • 用户自定义的数据集标签。

    使用示例

    from starwhale import Dataset
    myds = Dataset.from_huggingface("mnist", "mnist")
    print(myds[0])
    from starwhale import Dataset
    myds = Dataset.from_huggingface("mmlu", "cais/mmlu", subset="anatomy", split="auxiliary_train", revision="7456cfb")

    from_json

    from_json 是一个 classmethod 方法,能够将 json 字符串转化为 Starwhale 数据集。

    @classmethod
    def from_json(
    cls,
    name: str,
    json_text: str,
    field_selector: str = "",
    alignment_size: int | str = D_ALIGNMENT_SIZE,
    volume_size: int | str = D_FILE_VOLUME_SIZE,
    mode: DatasetChangeMode | str = DatasetChangeMode.PATCH,
    tags: t.List[str] | None = None,
    ) -> Dataset:

    参数

    • name: (str, required)
      • 数据集名称
    • json_text: (str, required)
      • json 字符串,from_json 函数会序列化该字符串为 Python 对象,然后开始构建 Starwhale 数据集。
    • field_selector: (str, optional)
      • 可以提取 json_text 中特定的 array 结构。
      • 默认从 json 的根提取数据。
    • alignment_size: (int|str, optional)
      • 数据集 blob 文件中数据对齐的尺寸。
      • 默认值为 128,即128个字节对齐。
    • volume_size: (int|str, optional)
      • 数据集 blob 文件的最大尺寸,超过后会产生一个新的 blob 文件。
      • 默认值为 64MB,即每个blob文件64MB大小。
    • mode: (str|DatasetChangeMode, optional)
      • 数据集更新的模式,包括 patch 模式和 overwrite 模式。
      • 默认为 patch 模式。
    • tags: (List[str], optional)
      • 用户自定义的数据集标签。

    使用示例

    from starwhale import Dataset
    myds = Dataset.from_json(
    name="translation",
    json_text='[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]'
    )
    print(myds[0].features.en)
    from starwhale import Dataset
    myds = Dataset.from_json(
    name="translation",
    json_text='{"content":{"child_content":[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]}}',
    field_selector="content.child_content"
    )
    print(myds[0].features["zh-cn"])

    from_folder

    from_folder 是一个 classmethod 方法,能够读取指定目录中的 Image/Video/Audio 数据,并将其自动转化为 Starwhale 数据集。该函数支持如下特性:

    • 能够递归的搜索目标目录及子目录
    • 支持三种类型的文件提取:
      • image: 支持 png/jpg/jpeg/webp/svg/apng 图片类型。图片文件会被转化为 Starwhale.Image 类型。
      • video: 支持 mp4/webm/avi 视频类型。视频文件会被转化为 Starwhale.Video 类型。
      • audio: 支持 mp3/wav 音频类型。音频文件会被转化为 Starwhale.Audio 类型。
    • 每个文件对应数据集的一条记录,文件对应的数据集字段名称为 file
    • auto_label=True,则会使用父目录的名称作为该条数据的标签,对应 label 字段。根目录下的文件,则不会被打标签。
    • 若存在与 image/video/audio 同名的 txt 文件,则该文件内容会被作为 caption 字段内容存放到数据集中。
    • 若根目录存在 metadata.csvmetadata.jsonl 文件,则会自动读取文件的内容,并将其通过文件路径名作为关联,存入数据集中,可以用来指定 meta 信息。
      • metadata.csvmetadata.jsonl 文件是互斥的,当都存在的时候,程序会抛出异常。
      • metadata.csvmetadata.jsonl 每行记录中需要包含 file_name 字段,指向对应文件的路径。
      • metadata.csvmetadata.jsonl 对于数据集构建是可选的。
    @classmethod
    def from_folder(
    cls,
    folder: str | Path,
    kind: str | DatasetFolderSourceType,
    name: str | Resource = "",
    auto_label: bool = True,
    alignment_size: int | str = D_ALIGNMENT_SIZE,
    volume_size: int | str = D_FILE_VOLUME_SIZE,
    mode: DatasetChangeMode | str = DatasetChangeMode.PATCH,
    tags: t.List[str] | None = None,
    ) -> Dataset:

    参数

    • folder: (str|Path, required)
      • 文件夹路径
    • kind: (str|DatasetFolderSourceType, required)
      • 数据类型设置,目前支持 image, videoaudio 三种类型。
      • 会根据设置的 kind 值,在 folder 中递归寻找对应类型的文件。其他类型文件会被忽略掉。
    • name: (str|Resource, optional)
      • 数据集名称。
      • 若不指定,则使用目录名称作为数据集名称。
    • auto_label: (bool, optional)
      • 是否根据父目录的名字自动对每条记录打标签。
      • 默认为 True
    • alignment_size: (int|str, optional)
      • 数据集 blob 文件中数据对齐的尺寸。
      • 默认值为 128,即128个字节对齐。
    • volume_size: (int|str, optional)
      • 数据集 blob 文件的最大尺寸,超过后会产生一个新的 blob 文件。
      • 默认值为 64MB,即每个blob文件64MB大小。
    • mode: (str|DatasetChangeMode, optional)
      • 数据集更新的模式,包括 patch 模式和 overwrite 模式。
      • 默认为 patch 模式。
    • tags: (List[str], optional)
      • 用户自定义的数据集标签。

    使用示例 ${folder-example}

    • 函数调用示例

      from starwhale import Dataset

      # create a my-image-dataset dataset from /path/to/image folder.
      ds = Dataset.from_folder(
      folder="/path/to/image",
      kind="image",
      name="my-image-dataset"
      )
    • caption 示例

      folder/dog/1.png
      folder/dog/1.txt

      1.txt 中的内容,会填充到 1.png 所在行中 caption 字段中。

    • metadata.csvmetadata.jsonl 示例

      metadata.csv 内容:

      file_name, caption
      1.png, dog
      2.png, cat

      metadata.jsonl 内容:

      {"file_name": "1.png", "caption": "dog"}
      {"file_name": "2.png", "caption": "cat"}
    • 自动 label 示例

      folder/dog/1.png
      folder/cat/2.png
      folder/dog/3.png
      folder/cat/4.png

      生成的数据集中包含四条数据,分为 dogcat 两类。

    __iter__

    __iter__ 是一个 method 方法,能否对数据集进行迭代。

    from starwhale import dataset

    ds = dataset("mnist")

    for item in ds:
    print(item.index)
    print(item.features.label) # label 和 img 是 mnist数据集中的数据列
    print(item.features.img)

    batch_iter

    batch_iter 是一个 method 方法,能否批量的进行数据集迭代。

    def batch_iter(
    self, batch_size: int = 1, drop_not_full: bool = False
    ) -> t.Iterator[t.List[DataRow]]:

    参数

    • batch_size: (int, optional)
      • batch的大小,默认值为1。
    • drop_not_full: (bool, optional)
      • 最后一组batch数据数量小于 batch_size 时,该组数据是否会被抛弃掉。
      • 默认是不抛弃。

    使用示例

    from starwhale import dataset

    ds = dataset("mnist")
    for batch_rows in ds.batch_iter(batch_size=2):
    assert len(batch_rows) == 2
    print(batch_rows[0].features)

    __getitem__

    __getitem__ 是一个 method 方法,能提供数据集中某些行数据的获取,操作方式类似 Python 的 dict 和 list 类型。

    from starwhale import dataset

    ds = dataset("mock-int-index")

    # if the index type is string
    ds["str_key"] # get the DataRow by the "str_key" string key
    ds["start":"end"] # get a slice of the dataset by the range ("start", "end")

    ds = dataset("mock-str-index")
    # if the index type is int
    ds[1] # get the DataRow by the 1 int key
    ds[1:10:2] # get a slice of the dataset by the range (1, 10), step is 2

    __setitem__

    __setitem__ 是一个 method 方法,能提供数据集中行数据的更新,操作方式类似 Python 的 dict 类型。__setitem__ 支持多线程并行插入数据。

    def __setitem__(
    self, key: t.Union[str, int], value: t.Union[DataRow, t.Tuple, t.Dict]
    ) -> None:

    参数

    • key: (int|str, required)
      • key 即为数据集中每行的 index,类型为 int 或 str,一个数据集中只接受一种类型。
    • value: (DataRow|tuple|dict, required)
      • value 即为数据集中每行的 features,一般建议用 Python 的 dict 类型。

    使用示例

    • 插入数据

    test 数据中插入两条数据,index分别为 testtest2

    from starwhale import dataset

    with dataset("test") as ds:
    ds["test"] = {"txt": "abc", "int": 1}
    ds["test2"] = {"txt": "bcd", "int": 2}
    ds.commit()
    • 并行插入数据
    from starwhale import dataset, Binary
    from concurrent.futures import as_completed, ThreadPoolExecutor

    ds = dataset("test")

    def _do_append(_start: int) -> None:
    for i in range(_start, 100):
    ds.append((i, {"data": Binary(), "label": i}))

    pool = ThreadPoolExecutor(max_workers=10)
    tasks = [pool.submit(_do_append, i * 10) for i in range(0, 9)]

    ds.commit()
    ds.close()

    __delitem__

    __delitem__ 是一个 method 方法,用来删除数据集中的某些行数据。

    def __delitem__(self, key: _ItemType) -> None:
    from starwhale import dataset

    ds = dataset("existed-ds")
    del ds[6:9]
    del ds[0]
    ds.commit()
    ds.close()

    append

    append 是一个 method 方法,用来向数据集中添加数据,类似 Python list 的 append 函数。

    • 添加 features dict,每行数据自动 index 为 int 类型,从0开始自增。

      from starwhale import dataset, Image

      with dataset("new-ds") as ds:
      for i in range(0, 100):
      ds.append({"label": i, "image": Image(f"folder/{i}.png")})
      ds.commit()
    • 添加 index + features dict,数据集中每行数据的 index 不会被自动处理。

      from dataset import dataset, Image

      with dataset("new-ds") as ds:
      for i in range(0, 100):
      ds.append((f"index-{i}", {"label": i, "image": Image(f"folder/{i}.png")}))

      ds.commit()

    extend

    extend 是一个 method 方法,用来向数据集中批量添加数据,类似 Python list 的 extend 函数。

    from starwhale import dataset, Text

    ds = dataset("new-ds")
    ds.extend([
    (f"label-{i}", {"text": Text(), "label": i}) for i in range(0, 10)
    ])
    ds.commit()
    ds.close()

    commit

    commit 是一个 method 方法,调用 commit 时会将当前缓存中数据 flush 到存储中,并产生一个数据集版本,后续可以用这个版本信息加载相应的数据集内容。

    对于一个数据集,如果添加一些数据后,并没有调用 commit 方法,而是直接调用 close 或退出进程,那么这些数据依旧会写入到数据集中,只是没有一个生成一个新的版本。

    @_check_readonly
    def commit(
    self,
    tags: t.Optional[t.List[str]] = None,
    message: str = "",
    force_add_tags: bool = False,
    ignore_add_tags_errors: bool = False,
    ) -> str:

    参数

    • tags: (List(str), optional)
      • 指定 tags,可以指定多个tag。
    • message: (str, optional)
      • 提交信息,默认为空。
    • force_add_tags: (bool, optional)
      • 当给改版本添加标签时,对于 server/cloud 实例,若标签已经被应用到其他数据集版本时,可以使用 force_add_tags=True 参数强制将标签添加到此版本上,否则会抛出异常。
      • 默认为 False
    • ignore_add_tags_errors: (bool, optional)
      • 忽略添加标签是否抛出的异常。
      • 默认为 False

    使用示例

    from starwhale import dataset
    with dataset("mnist") as ds:
    ds.append({"label": 1})
    ds.commit(message="init commit")

    readonly

    readonly 是一个 property 属性,表示数据集是否只读,返回值为 bool 类型。

    from starwhale import dataset
    ds = dataset("mnist", readonly=True)
    assert ds.readonly

    loading_version

    loading_version 是一个 property 属性,字符串类型。

    • 当加载一个已经存在的数据集时,返回的是数据集加载的对应版本。
    • 对加载一个不存在的数据集时,返回的是 pending_commit_version

    pending_commit_version

    pending_commit_version 是一个 property 属性,字符串类型,表示待提交的版本信息。当调用 commit 方法后,pending_commit_version 会变成 committed_version

    committed_version

    committed_version 是一个 property 属性,字符串类型,表示已经调用 commit 方法后生成的版本信息。当没有调用 commit 方法时,访问该属性时程序会抛出异常。

    remove

    remove 是一个 method 方法,等价于 swcli dataset remove 命令,能够删除数据集。

    def remove(self, force: bool = False) -> None:

    recover

    recover 是一个 method 方法,等价于 swcli dataset recover 命令,能够对软删除且未GC的数据集进行恢复。

    def recover(self, force: bool = False) -> None:

    summary

    summary 是一个 method 方法,等价于 swcli dataset summary 命令,返回数据集摘要信息。

    def summary(self) -> t.Optional[DatasetSummary]:

    history

    history 是一个 method 方法,等价于 swcli dataset history 命令,返回数据集的历史记录。

    def history(self) -> t.List[t.Dict]:

    flush

    flush 是一个 method 方法,能够将内存中暂存的数据刷到持久化存储中。commitclose 方法会自动调用 flush

    close

    close 是一个 method 方法,关闭已经打开的数据集相关链接。Dataset 也实现了 contextmanager,使用 with 语法后可以自动关闭数据集,不需要主动调用 close 方法。

    from starwhale import dataset

    ds = dataset("mnist")
    ds.close()

    with dataset("mnist") as ds:
    print(ds[0])

    head 是一个 method 方法,能够显示数据集前n行数据,等价于 swcli dataset head 命令。

    def head(self, n: int = 5, skip_fetch_data: bool = False) -> t.List[DataRow]:

    fetch_one

    fetch_one 是一个 method 方法,获得数据集的第一条记录,相当于 head(n=1)[0]

    list

    list 是一个 classmethod 方法,能够列出项目 URI 下的 Starwhale 数据集,等价于 swcli dataset list 命令。

    @classmethod
    def list(
    cls,
    project_uri: t.Union[str, Project] = "",
    fullname: bool = False,
    show_removed: bool = False,
    page_index: int = DEFAULT_PAGE_IDX,
    page_size: int = DEFAULT_PAGE_SIZE,
    ) -> t.Tuple[t.List[t.Dict[str, t.Any]], t.Dict[str, t.Any]]:

    copy

    copy 是一个 method 方法,能够复制数据到其他实例上,等价于 swcli dataset copy 命令。

    def copy(
    self,
    dest_uri: str,
    dest_local_project_uri: str = "",
    force: bool = False,
    mode: str = DatasetChangeMode.PATCH.value,
    ignore_tags: t.List[str] | None = None,
    ) -> None:

    参数

    • dest_uri: (str, required)
      • Dataset URI
    • dest_local_project_uri: (str, optional)
      • 从远端复制到本地 Standalone 实例时,可以指定对应的项目 URI。
    • force: (bool, optional)
      • 当目标实例上已经有相同版本的数据集时,是否强制覆盖。
      • 默认不覆盖。
      • 当复制标签到远端 Server/Cloud 实例时,若标签已经被其他版本使用,使用 force=True 参数可以强制变更标签到本版本上。
    • mode: (str, optional)
      • 数据集复制模式,分为 patch 模式 和 overwrite 模式,默认为 patch
      • patch: 使用补丁方式更新数据集,只更新计划变更的行和列,在新生成的版本中仍能读取到未受影响的行和列。
      • overwrite: 使用覆盖方式更新数据集,会将原来的所有行都删除,然后再进行更新,在新生成的版本中读取不到老数据。但请放心,删除的数据依旧可以通过旧版本进行访问。
    • ignore_tags (List[str], optional)
      • 复制数据集时,可以忽略的自定义标签。
      • 默认会复制所有用户自定义标签到其他实例中。
      • 复制标签会忽略 latest^v\d+$ 内建标签。

    使用示例

    from starwhale import dataset
    ds = dataset("mnist")
    ds.copy("cloud://remote-instance/project/starwhale")

    to_pytorch

    to_pytorch 是一个 method 方法,能够将 Starwhale 数据集转化为 Pytorch 的 torch.utils.data.Dataset 类型,可以进一步传给 torch.utils.data.DataLoader 进行使用。

    需要注意的是,to_pytorch 函数返回的是 Pytorch 的 IterableDataset

    def to_pytorch(
    self,
    transform: t.Optional[t.Callable] = None,
    drop_index: bool = True,
    skip_default_transform: bool = False,
    ) -> torch.utils.data.Dataset:

    参数

    • transform: (callable, optional)
      • 支持用户自定义变换函数,能够按需转化数据类型。
    • drop_index: (bool, optional)
      • 是否抛弃掉 index 字段。
    • skip_default_transform: (bool, optional)
      • 如果没有设置 transform, 默认状态下会使用 Starwhale 内建的 transform 函数,对数据进行转化,可以通过 skip_default_transform 参数禁用内建数据转化。

    使用示例

    import torch.utils.data as tdata
    from starwhale import dataset

    ds = dataset("mnist")

    torch_ds = ds.to_pytorch()
    torch_loader = tdata.DataLoader(torch_ds, batch_size=2)
    import torch.utils.data as tdata
    from starwhale import dataset

    with dataset("mnist") as ds:
    for i in range(0, 10):
    ds.append({"txt": Text(f"data-{i}"), "label": i})

    ds.commit()

    def _custom_transform(data: t.Any) -> t.Any:
    data = data.copy()
    txt = data["txt"].to_str()
    data["txt"] = f"custom-{txt}"
    return data

    torch_loader = tdata.DataLoader(
    dataset(ds.uri).to_pytorch(transform=_custom_transform), batch_size=1
    )
    item = next(iter(torch_loader))
    assert isinstance(item["label"], torch.Tensor)
    assert item["txt"][0] in ("custom-data-0", "custom-data-1")

    to_tensorflow

    to_tensorflow 是一个 method 方法,能够将 Starwhale 数据集转化为 Tensorflow 的 tensorflow.data.Dataset 类型。

    def to_tensorflow(self, drop_index: bool = True) -> tensorflow.data.Dataset:

    参数

    • drop_index: (bool, optional)
      • 是否抛弃掉 index 字段。

    使用示例

    from starwhale import dataset
    import tensorflow as tf

    ds = dataset("mnist")
    tf_ds = ds.to_tensorflow(drop_index=True)
    assert isinstance(tf_ds, tf.data.Dataset)

    with_builder_blob_config

    with_builder_blob_config 是一个 method 方法,用来设置 Starwhale 数据集中 blob 的相关属性信息。需要在变更数据之前调用。

    def with_builder_blob_config(
    self,
    volume_size: int | str | None = D_FILE_VOLUME_SIZE,
    alignment_size: int | str | None = D_ALIGNMENT_SIZE,
    ) -> Dataset:

    参数

    • volume_size: (int|str, optional)
      • 单个数据集 blob 文件的大小。
      • 默认值为 64MB。
      • 当类型为 int 时,单位为 Bytes。
      • 当类型为 str 是,格式类似 1GB, 64MB
    • alignment_size: (int|str, optional)
      • 数据集 blob 文件中数据对齐的大小
      • 默认值为 128个字节。
      • volume_size 一样的类型解析。

    使用示例

    from starwhale import dataset, Binary

    ds = dataset("mnist").with_builder_blob_config(volume_size="32M", alignment_size=128)
    ds.append({"data": Binary(b"123")})
    ds.commit()
    ds.close()

    with_loader_config

    with_loader_config 是一个 method 方法,用来设置 Starwhale 数据集 loader 的过程参数。

    def with_loader_config(
    self,
    num_workers: t.Optional[int] = None,
    cache_size: t.Optional[int] = None,
    field_transformer: t.Optional[t.Dict] = None,
    ) -> Dataset:

    参数

    • num_workers: (int, optional)
      • 加载数据集的 worker 数目,默认为2。
    • cache_size: (int, optional)
      • 预加载的数据的数量,默认为20条。
    • field_transformer: (dict, optional)
      • features 字段名称的变换。

    使用示例

    from starwhale import Dataset, dataset
    Dataset.from_json(
    "translation",
    '[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]'
    )
    myds = dataset("translation").with_loader_config(field_transformer={"en": "en-us"})
    assert myds[0].features["en-us"] == myds[0].features["en"]
    from starwhale import Dataset, dataset
    Dataset.from_json(
    "translation2",
    '[{"content":{"child_content":[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]}}]'
    )
    myds = dataset("translation2").with_loader_config(field_transformer={"content.child_content[0].en": "en-us"})
    assert myds[0].features["en-us"] == myds[0].features["content"]["child_content"][0]["en"]
    - + \ No newline at end of file diff --git a/zh/0.5.10/reference/sdk/evaluation/index.html b/zh/0.5.10/reference/sdk/evaluation/index.html index 943d5ebaf..a9d191d22 100644 --- a/zh/0.5.10/reference/sdk/evaluation/index.html +++ b/zh/0.5.10/reference/sdk/evaluation/index.html @@ -10,7 +10,7 @@ - + @@ -18,7 +18,7 @@
    版本:0.5.10

    Starwhale 模型评测 SDK

    @evaluation.predict

    @evaluation.predict 是一个修饰器,定义模型评测中的推理过程,类似 MapReduce 中的 map 阶段,能够完成如下核心功能:

    • 在 Server 实例上,申请运行所需要的资源。
    • 自动读取本地或远端的数据集,将数据集中的数据以单条或批量的方式,传递给 evaluation.predict 修饰的函数。
    • 通过多副本的设置,实现分布式数据集消费的功能,能以水平扩展的方式缩短模型评测任务的用时。
    • 自动将函数返回值和数据集的输入 features 存储到 results 表中,方便Web UI展示和进一步的 evaluate 阶段使用。
    • 每单条或每批量组数据会调用一次被修饰的函数,完成推理过程。

    控制参数

    • resources: (dict, optional)
      • 定义 predict 每个任务在 Server 实例上运行时所需要的资源,包括 memcpunvidia.com/gpu 三种类型。
        • mem: 单位为 Bytes,支持 int 和 float 类型。
          • 支持以字典的方式设置 requestlimit,比如 resources={"mem": {"request": 100 * 1024, "limit": 200: 1024}}
          • 若仅设置单个数字,则 SDK 会自动将 requestlimit 设置为相同的数值,比如 resources={"mem": 100 * 1024} 等价于 resources={"mem": {"request": 100 * 1024, "limit": 100 * 1024}}
        • cpu: 单位为 CPU 核心数,支持 int 和 float 类型。
          • 支持以字典的方式设置 requestlimit,比如 resources={"cpu": {"request": 1, "limit": 2}}
          • 若仅设置单个数字,则 SDK 会自动将 requestlimit 设置为相同的数值,比如 resources={"cpu": 1.5} 等价于 resources={"cpu": {"request": 1.5, "limit": 1.5}}
        • nvidia.com/gpu: 单位为 GPU显卡数,支持 int 类型。
          • nvidia.com/gpu 不支持设置 requestlimit,仅支持单个数字。
      • 需要注意: resource 参数目前仅在 Server 实例中生效。Cloud 实例,通过在提交评测任务时,选择对应的资源池达到相同的作用。Standalone 实例完全不支持该特性。
    • replicas: (int, optional)
      • predict 运行的副本数。
      • predict 相当于定义了一个 Step, 在该 Step 中有若干等价的 Task,每个 Task 在 Cloud/Server 实例上运行实体是 Pod,在 Standalone 实例上运行实体是 Thread。
      • 当指定多个副本时,这些副本是等价的,它们会共同消费选定的数据集,实现分布式数据集消费的目的,可以理解为某个数据集中的某行数据,只会被一个 predict 副本读取。
      • 默认值为1。
    • batch_size: (int, optional)
      • 批量将数据集中的数据传递进函数中。
      • 默认值为1。
    • fail_on_error: (bool, optional)
      • 当被修饰的函数抛出异常时,是否中断所有模型评测。如果预期某些“异常”数据会导致评测失败,但不想中断整体评测,可以设置 fail_on_error=False
      • 默认为 True
    • auto_log: (bool, optional)
      • 是否自动记录函数返回值和数据集输入 features 到 results 表中。
      • 默认为 True
    • log_mode: (str, optional)
      • auto_log=True 时,可以通过设置 log_mode 参数,定义以 plainpickle 方式记录函数返回值。
      • 默认为 pickle 方式。
    • log_dataset_features: (List[str], optional)
      • auto_log=True 时,可以通过该参数,选择性的记录数据集中的某些 features 。
      • 默认会记录所有的 features 。
    • needs: (List[Callable], optional)
      • 定义该任务运行的前置条件,可以用 needs 语法实现 DAG。
      • needs 接受被 @evaluation.predict, @evaluation.evaluate@handler 修饰的函数。
      • 默认为空,不依赖任何其他任务。

    传入参数

    被修饰的函数,需要定义一些输入参数,用来接受数据集内容等,包含如下模式:

    • 单个 data 参数:

      • data 为 一个类 dict 类型,能够读取到数据集的 features 内容。
      • batch_size=1 或不设置 batch_size 时,可以通过 data['label']data.label 方式读取 label feature。
      • 当设置 batch_size > 1 时,data 为一个 list。
      from starwhale import evaluation

      @evaluation.predict
      def predict(data):
      print(data['label'])
      print(data.label)
    • data + external 参数方式:

      • data 为数据集的features。
      • external 为一个 dict 类型,包含 index, index_with_dataset, dataset_info, contextdataset_uri 这些内建属性,可以用来做更细粒度的处理。
        • index: 数据集对应行的 index 信息。
        • index_with_dataset: 适用于多个数据集输入的时候做 index 区分。
        • dataset_info: starwhale.core.dataset.tabular.TabularDatasetInfo 对象。
        • context: starwhale.Context 对象。
        • dataset_uri: starwhale.nase.uri.resource.Resource 对象。
      from starwhale import evaluation

      @evaluation.predict
      def predict(data, external):
      print(data['label'])
      print(data.label)
      print(external["context"])
      print(external["dataset_uri"])
    • data + **kw 方式:

      • data 为数据集的features。
      • kw 可以读取到 external
      from starwhale import evaluation

      @evaluation.predict
      def predict(data, **kw):
      print(kw["external"]["context"])
      print(kw["external"]["dataset_uri"])
    • *args + **kwargs 方式:

      • args的第一个元素为 data
      from starwhale import evaluation

      @evaluation.predict
      def predict(*args, **kw):
      print(args[0].label)
      print(args[0]["label"])
      print(kw["external"]["context"])
    • **kwargs 方式:

      from starwhale import evaluation

      @evaluation.predict
      def predict(**kw):
      print(kw["data"].label)
      print(kw["data"]["label"])
      print(kw["external"]["context"])
    • *args 方式:

      • 此方式无法读取到 external 信息。
      from starwhale import evaluation

      @evaluation.predict
      def predict(*args):
      print(args[0].label)
      print(args[0]["label"])

    使用示例

    from starwhale import evaluation

    @evaluation.predict
    def predict_image(data):
    ...

    @evaluation.predict(
    dataset="mnist/version/latest",
    batch_size=32,
    replicas=4,
    needs=[predict_image],
    )
    def predict_batch_images(batch_data)
    ...

    @evaluation.predict(
    resources={"nvidia.com/gpu": 1,
    "cpu": {"request": 1, "limit": 2},
    "mem": 200 * 1024}, # 200MB
    log_mode="plain",
    )
    def predict_with_resources(data):
    ...

    @evaluation.predict(
    replicas=1,
    log_mode="plain",
    log_dataset_features=["txt", "img", "label"],
    )
    def predict_with_selected_features(data):
    ...

    @evaluation.evaluate

    @evaluation.evalute 是一个修饰器,定义模型评测中的评测过程,类似 MapReduce 中的 reduce 阶段,能够完成如下核心功能:

    • 在 Server 实例上,申请运行所需要的资源。
    • 自动读取 predict 阶段记录到 results 表的数据,并以迭代器的方式传入函数中。
    • evaluate 阶段只会运行一个副本,无法像 predict 阶段一样定义 replicas 参数。

    参数

    • resources: (dict, optional)
      • @evaluation.predict 中的 resources 参数定义保持一致。
    • needs: (List[Callable], optional)
      • @evaluation.predict 中的 needs 参数定义保持一致。
      • 绝大多数场景中,会依赖一个 @evaluation.predict 修饰的函数。
    • use_predict_auto_log: (bool, optional)
      • 默认为 True,传入一个能够能够遍历 predict 结果的迭代器到函数中。

    输入参数

    • use_predict_auto_log=True(默认)时,传入一个能够能够遍历 predict 结果的迭代器到函数中。
      • 迭代出来的对象为一个字典,包含 outputinput 两个key。
        • outputpredict 阶段函数返回的元素。
        • input 为推理时对应使用的数据集的 features ,为一个字典类型。
    • use_predict_auto_log=False 时,不传入任何参数到函数中。

    使用示例

    from starwhale import evaluation

    @evaluation.evaluate(needs=[predict_image])
    def evaluate_results(predict_result_iter):
    ...

    @evaluation.evaluate(
    use_predict_auto_log=False,
    needs=[predict_image],
    )
    def evaluate_results():
    ...

    evaluation.log

    evaluation.log 是一个函数,记录某些评测指标到特定表中,之后可以通过 Server/Cloud 实例的 Web 页面中查看相关的表。

    @classmethod
    def log(
    cls, category: str, id: t.Union[str, int], metrics: t.Dict[str, t.Any]
    ) -> None:

    参数

    • category: (str, required)
      • 记录的类别,该值会被作为 Starwhale Datastore 的表名的后缀。
      • 一个 category 会对应一张 Starwhale Datastore 的表,这些表会以评测任务ID作为隔离区分,相互不影响。
    • id: (str|int, required)
      • 记录的ID,表内唯一。
      • 同一张表,只能采用 str 或 int 的一种类型作为 ID 类型。
    • metrics: (dict, required)
      • 字典类型,key-value 方式记录指标。

    使用示例

    from starwhale import evaluation

    evaluation.log("label/1", 1, {"loss": 0.99, "accuracy": 0.98})
    evaluation.log("ppl", "1", {"a": "test", "b": 1})

    evaluation.log_summary

    evaluation.log_summary 是一个函数,记录某些指标到 summary 表中,Server/Cloud 实例评测页面显示的就是 summary 表的数据。 每次调用,Starwhale 都会自动以此次评测的唯一ID作为表的行ID进行更新,可以再一次评测过程中多次调用该函数,用来更新不同的列。

    每个项目中有一张 summary 表,所有该项目下的评测任务都会将 summary 信息写入该表中。

    @classmethod
    def log_summary(cls, *args: t.Any, **kw: t.Any) -> None:

    使用示例

    from starwhale import evaluation

    evaluation.log_summary(loss=0.99)
    evaluation.log_summary(loss=0.99, accuracy=0.99)
    evaluation.log_summary({"loss": 0.99, "accuracy": 0.99})

    evaluation.iter

    evaluation.iter 是一个函数,返回一个迭代器,用来迭代式的读取某些模型评测表中的数据。

    @classmethod
    def iter(cls, category: str) -> t.Iterator:

    参数

    • category: (str, required)
      • evaluation.log 函数中的 category 参数含义一致。

    使用示例

    from starwhale import evaluation

    results = [data for data in evaluation.iter("label/0")]

    @handler

    @handler 是一个修饰器,具备如下功能:

    • 在 Server 实例上,申请运行所需要的资源。
    • 可以控制副本数。
    • 多个 Handlers 可以通过依赖关系,生成DAG,便于控制执行流程。
    • 可以对外暴露端口,以类似 Web Handler 方式运行。

    @fine_tune, @evaluation.predict@evaluation.evalute 可以认为是 @handler 在某些特定领域的应用,@handler 是这些修饰器的底层实现。@handler 更为基础和灵活。

    @classmethod
    def handler(
    cls,
    resources: t.Optional[t.Dict[str, t.Any]] = None,
    replicas: int = 1,
    needs: t.Optional[t.List[t.Callable]] = None,
    name: str = "",
    expose: int = 0,
    require_dataset: bool = False,
    ) -> t.Callable:

    参数

    • resources: (dict, optional)
      • @evaluation.predict 中的 resources 参数定义保持一致。
    • needs: (List[Callable], optional)
      • @evaluation.predict 中的 needs 参数定义保持一致。
    • replicas: (int, optional)
      • @evaluation.predict 中的 replicas 参数定义保持一致。
    • name: (str, optional)
      • 显示 handler 时候用的名字。
      • 若不指定,则用修饰函数的名字。
    • expose: (int, optional)
      • 对外暴露的端口,当运行一个 Web Handler的时候,需要声明暴露的端口。
      • 默认为0,表示不暴露任何端口。
      • 目前只能暴露一个端口。
    • require_dataset: (bool, optional)
      • 定义此 Handler 运行时,是否需要数据集。
      • 如果 required_dataset=True,在 Server/Cloud 实例的 Web 界面创建评测任务的时候,需要让用户强制输入数据集;如果 required_dataset=False,则 Web 界面中不需要用户指定数据集。
      • 默认为 False

    使用示例

    from starwhale import handler
    import gradio

    @handler(resources={"cpu": 1, "nvidia.com/gpu": 1}, replicas=3)
    def my_handler():
    ...

    @handler(needs=[my_handler])
    def my_another_handler():
    ...

    @handler(expose=7860)
    def chatbot():
    with gradio.Blocks() as server:
    ...
    server.launch(server_name="0.0.0.0", server_port=7860)

    @fine_tune

    fine_tune 是一个修饰器,定义模型训练的微调(fine-tune)过程。

    一些限制和使用建议:

    • fine_tune 只有一个副本。
    • fine_tune 需要有数据集输入。
    • 一般在 fine_tune 开始时,通过 Context.get_runtime_context() 获取数据集。
    • 一般在 fine_tune 结束是,通过 starwhale.model.build 生成微调后的Starwhale 模型包,该模型包会被自动复制到评测对应的项目中。

    参数

    • resources: (dict, optional)
      • @evaluation.predict 中的 resources 参数定义保持一致。
    • needs: (List[Callable], optional)
      • @evaluation.predict 中的 needs 参数定义保持一致。

    使用示例

    from starwhale import model as starwhale_model
    from starwhale import fine_tune, Context

    @fine_tune(resources={"nvidia.com/gpu": 1})
    def llama_fine_tuning():
    ctx = Context.get_runtime_context()

    if len(ctx.dataset_uris) == 2:
    # TODO: use more graceful way to get train and eval dataset
    train_dataset = dataset(ctx.dataset_uris[0], readonly=True, create="forbid")
    eval_dataset = dataset(ctx.dataset_uris[1], readonly=True, create="forbid")
    elif len(ctx.dataset_uris) == 1:
    train_dataset = dataset(ctx.dataset_uris[0], readonly=True, create="forbid")
    eval_dataset = None
    else:
    raise ValueError("Only support 1 or 2 datasets(train and eval dataset) for now")

    #user training code
    train_llama(
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    )

    model_name = get_model_name()
    starwhale_model.build(name=f"llama-{model_name}-qlora-ft")

    @multi_classification

    @multi_classification 修饰器使用sklearn lib对多分类问题进行结果分析,输出confusion matrix, roc, auc等值,并且会写入到 starwhale DataStore 相关表中。 使用的时候需要对所修饰的函数返回值有一定要求,返回(label, result, probability_matrix)(label, result)

    def multi_classification(
    confusion_matrix_normalize: str = "all",
    show_hamming_loss: bool = True,
    show_cohen_kappa_score: bool = True,
    show_roc_auc: bool = True,
    all_labels: t.Optional[t.List[t.Any]] = None,
    ) -> t.Any:

    参数

    • confusion_matrix_normalize: (str, optional)
      • 接收三种参数:
        • true: rows
        • pred: columns
        • all: rows+columns
    • show_hamming_loss: (bool, optional)
      • 是否计算hamming loss。
      • 默认为 True
    • show_cohen_kappa_score: (bool, optional)
      • 是否计算 cohen kappa score。
      • 默认为 True
    • show_roc_auc: (bool, optional)
      • 是否计算roc/auc, 计算的时候,需要函数返回(label,result, probability_matrix) 三元组,否则只需返回(label, result) 两元组即可。
      • 默认为 True
    • all_labels: (List, optional)
      • 定义所有的Labels。

    使用示例


    @multi_classification(
    confusion_matrix_normalize="all",
    show_hamming_loss=True,
    show_cohen_kappa_score=True,
    show_roc_auc=True,
    all_labels=[i for i in range(0, 10)],
    )
    def evaluate(ppl_result) -> t.Tuple[t.List[int], t.List[int], t.List[t.List[float]]]:
    label, result, probability_matrix = [], [], []
    return label, result, probability_matrix

    @multi_classification(
    confusion_matrix_normalize="all",
    show_hamming_loss=True,
    show_cohen_kappa_score=True,
    show_roc_auc=False,
    all_labels=[i for i in range(0, 10)],
    )
    def evaluate(ppl_result) -> t.Tuple[t.List[int], t.List[int], t.List[t.List[float]]]:
    label, result = [], [], []
    return label, result

    PipelineHandler

    PipelineHandler 是一个类,提供默认的模型评测过程定义,需要用户实现 predictevaluate 函数。

    PipelineHandler 等价于 @evaluation.predict + @evaluation.evaluate,展示使用方式不一样,背后的模型评测过程一致。PipelineHandler 目前不支持 resources 参数的定义

    用户需要实现如下函数:

    • predict: 定义推理过程,等价于 @evaluation.predict 修饰的函数。
    • evaluate: 定义评测过程,等价于 @evaluation.evaluate 修饰的函数。
    from typing import Any, Iterator
    from abc import ABCMeta, abstractmethod

    class PipelineHandler(metaclass=ABCMeta):
    def __init__(
    self,
    predict_batch_size: int = 1,
    ignore_error: bool = False,
    predict_auto_log: bool = True,
    predict_log_mode: str = PredictLogMode.PICKLE.value,
    predict_log_dataset_features: t.Optional[t.List[str]] = None,
    **kwargs: t.Any,
    ) -> None:
    self.context = Context.get_runtime_context()
    ...

    def predict(self, data: Any, **kw: Any) -> Any:
    raise NotImplementedError

    def evaluate(self, ppl_result: Iterator) -> Any
    raise NotImplementedError

    参数

    • predict_batch_size: (int, optional)
      • 等价于 @evaluation.predict 中的 batch_size 参数。
      • 默认值为1。
    • ignore_error: (bool, optional)
      • 等价于 @evaluation.predict 中的 fail_on_error 参数。
      • 默认值为 False
    • predict_auto_log: (bool, optional)
      • 等价于 @evaluation.predict 中的 auto_log 参数。
      • 默认值为 True
    • predict_log_mode: (bool, optional)
      • 等价于 @evaluation.predict 中的 log_mode 参数。
      • 默认值为 pickle
    • predict_log_dataset_features: (bool, optional)
      • 等价于 @evaluation.predict 中的 log_dataset_features 参数。
      • 默认值为空,对记录所有 features。

    使用示例

    import typing as t

    import torch
    from starwhale import PipelineHandler

    class Example(PipelineHandler):
    def __init__(self) -> None:
    super().__init__()
    self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    self.model = self._load_model(self.device)

    def predict(self, data: t.Dict):
    data_tensor = self._pre(data.img)
    output = self.model(data_tensor)
    return self._post(output)

    def evaluate(self, ppl_result):
    result, label, pr = [], [], []
    for _data in ppl_result:
    label.append(_data["input"]["label"])
    result.extend(_data["output"][0])
    pr.extend(_data["output"][1])
    return label, result, pr

    def _pre(self, input: Image) -> torch.Tensor:
    ...

    def _post(self, input):
    ...

    def _load_model(self, device):
    ...

    Context

    执行模型评测过程中传入的上下文信息,包括Project、Task ID等。Context 的内容是自动注入的,可以通过如下方式使用:

    • 继承 PipelineHandler 类内使用 self.context 对象。
    • 通过 Context.get_runtime_context() 获取。

    需要注意,只有在模型评测过程中,才能使用Context,否则程序会抛出异常。

    目前Context可以获得如下值:

    • project: str
      • Project 名字。
    • version: str
      • 模型评测的唯一ID。
    • step: str
      • Step 名字。
    • total: int
      • Step 下所有 Task 的数量。
    • index: int
      • Task 索引标号,下标从0开始。
    • dataset_uris: List[str]
      • Starwhale 数据集的URI 列表。

    使用示例


    from starwhale import Context, PipelineHandler

    def func():
    ctx = Context.get_runtime_context()
    print(ctx.project)
    print(ctx.version)
    print(ctx.step)
    ...

    class Example(PipelineHandler):

    def predict(self, data: t.Dict):
    print(self.context.project)
    print(self.context.version)
    print(self.context.step)

    @starwhale.api.service.api

    @starwhale.api.service.api 是一个修饰器,提供基于 Gradio 的简易 Web Handler 输入定义,当用户使用 swcli model serve 命令启动 Web Service 接收外部请求,并将推理结果返回给用户,实现在线评测。

    使用示例

    import gradio
    from starwhale.api.service import api

    def predict_image(img):
    ...

    @api(gradio.File(), gradio.Label())
    def predict_view(file: t.Any) -> t.Any:
    with open(file.name, "rb") as f:
    data = Image(f.read(), shape=(28, 28, 1))
    _, prob = predict_image({"img": data})
    return {i: p for i, p in enumerate(prob)}

    starwhale.api.service.Service

    如果希望自定义 web service 的实现, 可以继承 Service 并重写 serve 函数即可。

    class CustomService(Service):
    def serve(self, addr: str, port: int, handler_list: t.List[str] = None) -> None:
    ...

    svc = CustomService()

    @svc.api(...)
    def handler(data):
    ...

    说明:

    • 使用 PipelineHandler.add_api 函数添加的 handler 和 api 以及实例化的 Service.api decorator 添加的 handler 可以同时生效
    • 如果使用自定义的 Service, 需要在 model 中实例化自定义的 Service 类

    自定义 Request 和 Response

    Request 和 Response 分别是用于接收用户请求和返回给用户结果的处理类, 可以简单的理解成是 handler 的前处理和后处理逻辑

    Starwhale 将支持 Dataset 内置类型的 Request 实现以及 Json Response 的实现, 同时用户可以自定义处理逻辑来使用, 自定义的示例如下:

    import typing as t

    from starwhale.api.service import (
    Request,
    Service,
    Response,
    )

    class CustomInput(Request):
    def load(self, req: t.Any) -> t.Any:
    return req


    class CustomOutput(Response):
    def __init__(self, prefix: str) -> None:
    self.prefix = prefix

    def dump(self, req: str) -> bytes:
    return f"{self.prefix} {req}".encode("utf-8")

    svc = Service()

    @svc.api(request=CustomInput(), response=CustomOutput("hello"))
    def foo(data: t.Any) -> t.Any:
    ...
    - + \ No newline at end of file diff --git a/zh/0.5.10/reference/sdk/model/index.html b/zh/0.5.10/reference/sdk/model/index.html index 24deb71e9..c4b09d318 100644 --- a/zh/0.5.10/reference/sdk/model/index.html +++ b/zh/0.5.10/reference/sdk/model/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    Starwhale 模型 SDK

    model.build

    model.build 是一个函数,能够构建 Starwhale 模型,等价于 swcli model build 命令。

    def build(
    modules: t.Optional[t.List[t.Any]] = None,
    workdir: t.Optional[_path_T] = None,
    name: t.Optional[str] = None,
    project_uri: str = "",
    desc: str = "",
    remote_project_uri: t.Optional[str] = None,
    add_all: bool = False,
    tags: t.List[str] | None = None,
    ) -> None:

    参数

    • modules: (List[str|object], optional)
      • 构建时导入的模块,为列表类型,可以指定多个模块。
      • 模块类型包含两种:
        • 字符串类型: Python 可 Import 的路径,比如 "to.path.module", "to.path.module:object" 。
        • Python 对象: model.build 函数会自动解析所对应的模块。
      • 如果不指定,则会搜索当前已经导入的模块。
    • name: (str, optional)
      • Starwhale 模型的名称。
      • 若不指定,则会使用 cwd 目录名作为 Starwhale 模型的名称。
    • workdir: (str, Pathlib.Path, optional)
      • Starwhale 模型打包的根目录,此目录下的文件会被打包。
    • project_uri: (str, optional)
      • Project URI,表示该模型属于哪个项目。
      • 默认为 swcli project select 选择的项目。
    • desc: (str, optional)
      • 描述信息,默认为空。
    • remote_project_uri: (str, optional)
      • 其他示例的项目 URI,构建完Starwhale 模型后,会被自动复制到远端实例中。
    • add_all: (bool, optional)
      • Starwhale 模型打包的时候会自动忽略一些类似 pyc/venv/conda 构建目录等,可以通过该参数将这些文件也进行打包。即使该参数使用,也不影响 .swignore 文件的预期作用。
      • 默认为 False
    • tags: (List[str], optional)
      • 用户自定义标签。
      • 不能指定 latest^v\d+$ 这两个 Starwhale 系统内建标签。

    使用示例

    from starwhale import model

    # class search handlers
    from .user.code.evaluator import ExamplePipelineHandler
    model.build([ExamplePipelineHandler])

    # function search handlers
    from .user.code.evaluator import predict_image
    model.build([predict_image])

    # module handlers, @handler decorates function in this module
    from .user.code import evaluator
    model.build([evaluator])

    # str search handlers
    model.build(["user.code.evaluator:ExamplePipelineHandler"])
    model.build(["user.code1", "user.code2"])

    # no search handlers, use imported modules
    model.build()

    # add user custom tags
    model.build(tags=["t1", "t2"])
    - + \ No newline at end of file diff --git a/zh/0.5.10/reference/sdk/other/index.html b/zh/0.5.10/reference/sdk/other/index.html index d9b27bf98..bfb4328eb 100644 --- a/zh/0.5.10/reference/sdk/other/index.html +++ b/zh/0.5.10/reference/sdk/other/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    其他 SDK

    __version__

    Starwhale Python SDK 和 swcli 版本,是字符串常量。

    >>> from starwhale import __version__
    >>> print(__version__)
    0.5.7

    init_logger

    init_logger 用来设置日志输出级别。默认为0

    • 0: 输出 errors 信息,traceback 呈现最近的1个堆栈。
    • 1: 输出 errors + warnings 信息,traceback 呈现最近的5个堆栈内容。
    • 2: 输出 errors + warnings + info 信息,trackback 呈现最多10个堆栈内容。
    • 3: 输出 errors + warnings + info + debug 信息,trackback 呈现最多100个堆栈内容。
    • >=4: 输出 errors + warnings + info + debug + trace 信息,trackback 呈现最多1000个堆栈内容。
    def init_logger(verbose: int = 0) -> None:

    login

    登录 server/cloud 实例,等价于 swcli instance login 命令。登录 Standalone 实例是无意义的。

    def login(
    instance: str,
    alias: str = "",
    username: str = "",
    password: str = "",
    token: str = "",
    ) -> None:

    参数

    • instance: (str, required)
      • server/cloud 实例的 http url。
    • alias: (str, optional)
      • 实例的别名,可以简化 Starwhale URI 中 instance部分。
      • 若不指定,则使用实例的 http url 中 hostname 部分。
    • username: (str, optional)
    • password: (str, optional)
    • token: (str, optional)
      • username + passwordtoken 只能选择一种方式登录实例。

    使用示例

    from starwhale import login

    # login to Starwhale Cloud instance by token
    login(instance="https://cloud.starwhale.cn", alias="cloud-cn", token="xxx")

    # login to Starwhale Server instance by username and password
    login(instance="http://controller.starwhale.svc", alias="dev", username="starwhale", password="abcd1234")

    logout

    登出 server/cloud 实例, 等价于 swcli isntance logout 命令。登出 Standalone 实例是无意义的。

    def logout(instance: str) -> None:

    使用示例

    from starwhale import login, logout

    login(instance="https://cloud.starwhale.cn", alias="cloud-cn", token="xxx")
    # logout by the alias
    logout("cloud-cn")

    login(instance="http://controller.starwhale.svc", alias="dev", username="starwhale", password="abcd1234")
    # logout by the instance http url
    logout("http://controller.starwhale.svc")
    - + \ No newline at end of file diff --git a/zh/0.5.10/reference/sdk/overview/index.html b/zh/0.5.10/reference/sdk/overview/index.html index 14ae86265..5d311f430 100644 --- a/zh/0.5.10/reference/sdk/overview/index.html +++ b/zh/0.5.10/reference/sdk/overview/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    Python SDK 概览

    Starwhale 提供一系列的 Python SDK,帮助用户管理数据集、模型和评测等调用,使用 Starwhale Python SDK 能让您更好的完成 ML/DL 开发任务。

    • class PipelineHandler: 提供默认的模型评测过程定义,需要用户实现 predictevaluate 函数。
    • class Context: 执行模型评测过程中传入的上下文信息,包括 Project、Task ID 等。
    • class Dataset: Starwhale 数据集类。
    • class starwhale.api.service.Service: 在线评测的基础类。

    函数

    • @multi_classification: 修饰器,适用于多分类问题,用来简化 evaluate 结果的进一步计算和结果存储,能更好的呈现评测结果。
    • @handler: 修饰器,定义一个带有资源属性(mem/cpu/gpu)的运行实体,可以控制副本数。多个Handlers可以通过依赖关系,生成DAG,便于控制执行流程。
    • @evaluation.predict: 修饰器,定义模型评测中的推理过程,类似 MapReduce 中的 map 阶段。
    • @evaluation.evaluate: 修饰器,定义模型评测中的评测过程,类似 MapReduce 中的 reduce 阶段。
    • evaluation.log: 记录某些评测指标到特定表中。
    • evaluation.log_summary: 记录某些指标到 summary 表中。
    • evaluation.iter: 迭代读取某些表中的数据。
    • model.build: 进行 Starwhale 模型构建。
    • @fine_tune: 修饰器,定义模型训练的微调(fine-tune)过程。
    • init_logger: 设置日志输出级别,实现五种级别日志输出。
    • dataset: 获取 starwhale.Dataset 对象,包括创建新的数据集和加载已经存在的数据集两种方式。
    • @starwhale.api.service.api: 修饰器,提供基于 Gradio 的简易 Web Handler 输入定义,实现在线评测。
    • login: 登录 server/cloud 实例。
    • logout: 登出 server/cloud 实例。

    数据类型

    • COCOObjectAnnotation: 提供COCO类型的定义。
    • BoundingBox: 边界框类型,目前为 LTWH 格式,即 left_x, top_y, widthheight
    • ClassLabel: 描述label的数量和类型。
    • Image: 图片类型。
    • GrayscaleImage: 灰度图类型,比如MNIST中数字手写体图片,是 Image 类型的一个特例。
    • Audio: 音频类型。
    • Video: 视频类型。
    • Text: 文本类型,默认为 utf-8 格式,用来存储大文本。
    • Binary: 二进制类型,用 bytes 存储,用来存储比较大的二进制内容。
    • Line: 直线类型。
    • Point: 点类型。
    • Polygon: 多边形类型。
    • Link: Link类型,用来制作 remote-link 类型的数据。
    • S3LinkAuth: 当数据存储在基于S3协议的对象存储上时,该类型负责描述授权、密钥信息。
    • MIMEType: 描述 Starwhale 支持的多媒体类型,用在 ImageVideo 等类型的 mime_type 属性上,能更好的进行 Dataset Viewer。
    • LinkType: 描述 Starwhale 支持的remote-link类型,目前支持 LocalFSS3 两种类型。

    其他

    • __version__: Starwhale Python SDK 和 swcli 版本,是字符串常量。

    进一步阅读建议

    - + \ No newline at end of file diff --git a/zh/0.5.10/reference/sdk/type/index.html b/zh/0.5.10/reference/sdk/type/index.html index ab8175a47..c5be40fc2 100644 --- a/zh/0.5.10/reference/sdk/type/index.html +++ b/zh/0.5.10/reference/sdk/type/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    Starwhale 数据类型 SDK

    COCOObjectAnnotation

    提供COCO类型的定义。

    COCOObjectAnnotation(
    id: int,
    image_id: int,
    category_id: int,
    segmentation: Union[t.List, t.Dict],
    area: Union[float, int],
    bbox: Union[BoundingBox, t.List[float]],
    iscrowd: int,
    )
    参数说明
    idobject id,一般为全局object的递增id
    image_idimage id,一般为图片id
    category_idcategory id,一般为目标检测中类别的id
    segmentation物体轮廓表示,Polygon(多边形的点)或RLE格式
    areaobject面积
    bbox表示bounding box,可以为BoundingBox类型或float的列表
    iscrowd0表示是一个单独的object,1表示两个没有分开的object

    使用示例

    def _make_coco_annotations(
    self, mask_fpath: Path, image_id: int
    ) -> t.List[COCOObjectAnnotation]:
    mask_img = PILImage.open(str(mask_fpath))

    mask = np.array(mask_img)
    object_ids = np.unique(mask)[1:]
    binary_mask = mask == object_ids[:, None, None]
    # TODO: tune permute without pytorch
    binary_mask_tensor = torch.as_tensor(binary_mask, dtype=torch.uint8)
    binary_mask_tensor = (
    binary_mask_tensor.permute(0, 2, 1).contiguous().permute(0, 2, 1)
    )

    coco_annotations = []
    for i in range(0, len(object_ids)):
    _pos = np.where(binary_mask[i])
    _xmin, _ymin = float(np.min(_pos[1])), float(np.min(_pos[0]))
    _xmax, _ymax = float(np.max(_pos[1])), float(np.max(_pos[0]))
    _bbox = BoundingBox(
    x=_xmin, y=_ymin, width=_xmax - _xmin, height=_ymax - _ymin
    )

    rle: t.Dict = coco_mask.encode(binary_mask_tensor[i].numpy()) # type: ignore
    rle["counts"] = rle["counts"].decode("utf-8")

    coco_annotations.append(
    COCOObjectAnnotation(
    id=self.object_id,
    image_id=image_id,
    category_id=1, # PennFudan Dataset only has one class-PASPersonStanding
    segmentation=rle,
    area=_bbox.width * _bbox.height,
    bbox=_bbox,
    iscrowd=0, # suppose all instances are not crowd
    )
    )
    self.object_id += 1

    return coco_annotations

    GrayscaleImage

    提供灰度图类型,比如MNIST中数字手写体图片,是 Image 类型的一个特例。

    GrayscaleImage(
    fp: _TArtifactFP = "",
    display_name: str = "",
    shape: Optional[_TShape] = None,
    as_mask: bool = False,
    mask_uri: str = "",
    )
    参数说明
    fp图片的路径、IO对象或文件内容的bytes
    display_nameDataset Viewer上展示的名字
    shape图片的Width和Height,channel默认为1
    as_mask是否作为Mask图片
    mask_uriMask原图的URI

    使用示例

    for i in range(0, min(data_number, label_number)):
    _data = data_file.read(image_size)
    _label = struct.unpack(">B", label_file.read(1))[0]
    yield GrayscaleImage(
    _data,
    display_name=f"{i}",
    shape=(height, width, 1),
    ), {"label": _label}

    GrayscaleImage函数

    GrayscaleImage.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    GrayscaleImage.carry_raw_data

    carry_raw_data() -> GrayscaleImage

    GrayscaleImage.astype

    astype() -> Dict[str, t.Any]

    BoundingBox

    提供边界框类型,目前为 LTWH 格式,即 left_x, top_y, widthheight

    BoundingBox(
    x: float,
    y: float,
    width: float,
    height: float
    )
    参数说明
    xleft_x的坐标
    ytop_y的坐标
    width图片的宽度
    height图片的高度

    ClassLabel

    描述label的数量和类型。

    ClassLabel(
    names: List[Union[int, float, str]]
    )

    Image

    图片类型。

    Image(
    fp: _TArtifactFP = "",
    display_name: str = "",
    shape: Optional[_TShape] = None,
    mime_type: Optional[MIMEType] = None,
    as_mask: bool = False,
    mask_uri: str = "",
    )
    参数说明
    fp图片的路径、IO对象或文件内容的bytes
    display_nameDataset Viewer上展示的名字
    shape图片的Width、Height和channel
    mime_typeMIMEType支持的类型
    as_mask是否作为Mask图片
    mask_uriMask原图的URI

    使用示例

    import io
    import typing as t
    import pickle
    from PIL import Image as PILImage
    from starwhale import Image, MIMEType

    def _iter_item(paths: t.List[Path]) -> t.Generator[t.Tuple[t.Any, t.Dict], None, None]:
    for path in paths:
    with path.open("rb") as f:
    content = pickle.load(f, encoding="bytes")
    for data, label, filename in zip(
    content[b"data"], content[b"labels"], content[b"filenames"]
    ):
    annotations = {
    "label": label,
    "label_display_name": dataset_meta["label_names"][label],
    }

    image_array = data.reshape(3, 32, 32).transpose(1, 2, 0)
    image_bytes = io.BytesIO()
    PILImage.fromarray(image_array).save(image_bytes, format="PNG")

    yield Image(
    fp=image_bytes.getvalue(),
    display_name=filename.decode(),
    shape=image_array.shape,
    mime_type=MIMEType.PNG,
    ), annotations

    Image函数

    Image.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Image.carry_raw_data

    carry_raw_data() -> GrayscaleImage

    Image.astype

    astype() -> Dict[str, t.Any]

    Video

    视频类型。

    Video(
    fp: _TArtifactFP = "",
    display_name: str = "",
    mime_type: Optional[MIMEType] = None,
    )
    参数说明
    fp视频的路径、IO对象或文件内容的bytes
    display_nameDataset Viewer上展示的名字
    mime_typeMIMEType支持的类型

    使用示例

    import typing as t
    from pathlib import Path

    from starwhale import Video, MIMEType

    root_dir = Path(__file__).parent.parent
    dataset_dir = root_dir / "data" / "UCF-101"
    test_ds_path = [root_dir / "data" / "test_list.txt"]

    def iter_ucf_item() -> t.Generator:
    for path in test_ds_path:
    with path.open() as f:
    for line in f.readlines():
    _, label, video_sub_path = line.split()

    data_path = dataset_dir / video_sub_path
    data = Video(
    data_path,
    display_name=video_sub_path,
    shape=(1,),
    mime_type=MIMEType.WEBM,
    )

    yield f"{label}_{video_sub_path}", {
    "video": data,
    "label": label,
    }

    Audio

    音频类型。

    Audio(
    fp: _TArtifactFP = "",
    display_name: str = "",
    mime_type: Optional[MIMEType] = None,
    )
    参数说明
    fp音频文件的路径、IO对象或文件内容的bytes
    display_nameDataset Viewer上展示的名字
    mime_typeMIMEType支持的类型

    使用示例

    import typing as t
    from starwhale import Audio

    def iter_item() -> t.Generator[t.Tuple[t.Any, t.Any], None, None]:
    for path in validation_ds_paths:
    with path.open() as f:
    for item in f.readlines():
    item = item.strip()
    if not item:
    continue

    data_path = dataset_dir / item
    data = Audio(
    data_path, display_name=item, shape=(1,), mime_type=MIMEType.WAV
    )

    speaker_id, utterance_num = data_path.stem.split("_nohash_")
    annotations = {
    "label": data_path.parent.name,
    "speaker_id": speaker_id,
    "utterance_num": int(utterance_num),
    }
    yield data, annotations

    Audio函数

    Audio.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Audio.carry_raw_data

    carry_raw_data() -> Audio

    Audio.astype

    astype() -> Dict[str, t.Any]

    Text

    文本类型,默认为 utf-8 格式。

    Text(
    content: str,
    encoding: str = "utf-8",
    )
    参数说明
    contenttext内容
    encodingtext的编码格式

    使用示例

    import typing as t
    from pathlib import Path
    from starwhale import Text

    def iter_item(self) -> t.Generator[t.Tuple[t.Any, t.Any], None, None]:
    root_dir = Path(__file__).parent.parent / "data"

    with (root_dir / "fra-test.txt").open("r") as f:
    for line in f.readlines():
    line = line.strip()
    if not line or line.startswith("CC-BY"):
    continue

    _data, _label, *_ = line.split("\t")
    data = Text(_data, encoding="utf-8")
    annotations = {"label": _label}
    yield data, annotations

    Text函数

    to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Text.carry_raw_data

    carry_raw_data() -> Text

    Text.astype

    astype() -> Dict[str, t.Any]

    Text.to_str

    to_str() -> str

    Binary

    二进制类型,用bytes存储。

    Binary(
    fp: _TArtifactFP = "",
    mime_type: MIMEType = MIMEType.UNDEFINED,
    )
    参数说明
    fp路径、IO对象或文件内容的bytes
    mime_typeMIMEType支持的类型

    Binary函数

    Binary.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Binary.carry_raw_data

    carry_raw_data() -> Binary

    Binary.astype

    astype() -> Dict[str, t.Any]

    Link类型,用来制作 remote-link 类型的数据集。

    Link(
    uri: str,
    auth: Optional[LinkAuth] = DefaultS3LinkAuth,
    offset: int = 0,
    size: int = -1,
    data_type: Optional[BaseArtifact] = None,
    )
    参数说明
    uri原始数据的uri地址,目前支持localFS和S3两种协议
    authLink Auth信息
    offset数据相对uri指向的文件偏移量
    size数据大小
    data_typeLink指向的实际数据类型,目前支持 Binary, Image, Text, AudioVideo 类型

    Link函数

    Link.astype

    astype() -> Dict[str, t.Any]

    S3LinkAuth

    当数据存储在基于S3协议的对象存储上时,该类型负责描述授权、密钥信息。

    S3LinkAuth(
    name: str = "",
    access_key: str = "",
    secret: str = "",
    endpoint: str = "",
    region: str = "local",
    )
    参数说明
    nameAuth的名称
    access_keyS3连接中的access_key
    secretS3连接中的secret
    endpointS3连接中的endpoint地址
    regionbucket所在的S3 region,默认为local

    使用示例

    import struct
    import typing as t
    from pathlib import Path

    from starwhale import (
    Link,
    S3LinkAuth,
    GrayscaleImage,
    UserRawBuildExecutor,
    )
    class LinkRawDatasetProcessExecutor(UserRawBuildExecutor):
    _auth = S3LinkAuth(name="mnist", access_key="minioadmin", secret="minioadmin")
    _endpoint = "10.131.0.1:9000"
    _bucket = "users"

    def iter_item(self) -> t.Generator[t.Tuple[t.Any, t.Any], None, None]:
    root_dir = Path(__file__).parent.parent / "data"

    with (root_dir / "t10k-labels-idx1-ubyte").open("rb") as label_file:
    _, label_number = struct.unpack(">II", label_file.read(8))

    offset = 16
    image_size = 28 * 28

    uri = f"s3://{self._endpoint}/{self._bucket}/dataset/mnist/t10k-images-idx3-ubyte"
    for i in range(label_number):
    _data = Link(
    f"{uri}",
    self._auth,
    offset=offset,
    size=image_size,
    data_type=GrayscaleImage(display_name=f"{i}", shape=(28, 28, 1)),
    )
    _label = struct.unpack(">B", label_file.read(1))[0]
    yield _data, {"label": _label}
    offset += image_size

    MIMEType

    描述Starwhale支持的多媒体类型,用Python Enum类型实现,用在 ImageVideo 等类型的mime_type 属性上,能更好的进行Dataset Viewer。

    class MIMEType(Enum):
    PNG = "image/png"
    JPEG = "image/jpeg"
    WEBP = "image/webp"
    SVG = "image/svg+xml"
    GIF = "image/gif"
    APNG = "image/apng"
    AVIF = "image/avif"
    PPM = "image/x-portable-pixmap"
    MP4 = "video/mp4"
    AVI = "video/avi"
    WEBM = "video/webm"
    WAV = "audio/wav"
    MP3 = "audio/mp3"
    PLAIN = "text/plain"
    CSV = "text/csv"
    HTML = "text/html"
    GRAYSCALE = "x/grayscale"
    UNDEFINED = "x/undefined"

    LinkType

    描述Starwhale支持的remote-link类型,用Python Enum类型实现,目前支持 LocalFSS3 两种类型。

    class LinkType(Enum):
    LocalFS = "local_fs"
    S3 = "s3"
    UNDEFINED = "undefined"

    Line

    描述直线。

    from starwhale import ds, Point, Line

    with dataset("collections") as ds:
    line_points = [
    Point(x=0.0, y=1.0),
    Point(x=0.0, y=100.0)
    ]
    ds.append({"line": line_points})
    ds.commit()

    Point

    描述点。

    from starwhale import ds, Point

    with dataset("collections") as ds:
    ds.append(Point(x=0.0, y=100.0))
    ds.commit()

    Polygon

    描述多边形。

    from starwhale import ds, Point, Polygon

    with dataset("collections") as ds:
    polygon_points = [
    Point(x=0.0, y=1.0),
    Point(x=0.0, y=100.0),
    Point(x=2.0, y=1.0),
    Point(x=2.0, y=100.0),
    ]
    ds.append({"polygon": polygon_points})
    ds.commit()
    - + \ No newline at end of file diff --git a/zh/0.5.10/reference/swcli/dataset/index.html b/zh/0.5.10/reference/swcli/dataset/index.html index 1e9fe4401..d085c7e83 100644 --- a/zh/0.5.10/reference/swcli/dataset/index.html +++ b/zh/0.5.10/reference/swcli/dataset/index.html @@ -10,7 +10,7 @@ - + @@ -21,7 +21,7 @@ | --page | N | Integer | 1 | 起始页码,仅限Server和Cloud实例。 | | --size | N | Integer | 20 | 一页中的数据集数量,仅限Server和Cloud实例。 | | --filter-fl | N | String | | 仅显示符合条件的数据集。该选项可以在一个命令中被多次重复使用。 |

    过滤器类型说明范例
    nameKey-Value数据集名称前缀--filter name=mnist
    ownerKey-Value数据集所有者名字--filter owner=starwhale
    latestFlag如果指定了该选项,结果中仅显示最新版本。--filter latest

    swcli dataset recover

    swcli [全局选项] dataset recover [选项] <DATASET>

    dataset recover 恢复以前删除的Starwhale数据集或版本。

    DATASET 是一个数据集URI。如果URI不包含版本,则会恢复所有删除的版本。

    已经被垃圾回收或者使用 --force 选项删除的 Starwhale 数据集或版本无法使用本命令恢复。

    选项必填项类型默认值说明
    --force-fNBooleanFalse如果使用了该选项,当前同名的Starwhale数据集或版本会被强制覆盖。

    swcli dataset remove

    swcli [全局选项] dataset remove [选项] <DATASET>

    dataset remove 删除指定的 Starwhale 数据集或某个版本。

    DATASET 是一个数据集URI。如果URI不包含版本,则删除指定数据集的所有版本。软删除的 Starwhale 数据集,可以通过 swcli dataset recover 命令进行恢复(未进行垃圾回收)。

    被删除的Starwhale数据集或版本可以通过 swcli dataset list --show-removed 列出。

    选项必填项类型默认值说明
    --force-fNBooleanFalse使用此选项永久删除某个Starwhale数据集或版本。删除后不可恢复。

    swcli dataset summary

    swcli [全局选项] dataset summary <DATASET>

    显示数据集摘要信息。DATASET 是一个数据集URI

    swcli dataset tag

    swcli [全局选项] dataset tag [选项] <DATASET> [TAGS]...

    dataset tag 将标签附加到指定的Starwhale数据集版本,同时支持删除和列出所有标签的功能。可以在数据集URI中使用标签替代版本ID。

    DATASET是一个数据集URI

    每个数据集版本可以包含任意数量的标签,但同一数据集中不允许有重复的标签名称。

    选项必填项类型默认值说明
    --remove-rNBooleanFalse使用该选项删除标签
    --quiet-qNBooleanFalse使用该选项以忽略错误,例如删除不存在的标签。
    --force-add-fNBooleanFalse当向 server/cloud 实例中添加标签时,若遇到其他版本的数据集已经使用该标签会提示报错,强制更新时可以使用 --force-add 参数。

    数据集标签的例子

    #- list tags of the mnist dataset
    swcli dataset tag mnist

    #- add tags for the mnist dataset
    swcli dataset tag mnist -t t1 -t t2
    swcli dataset tag cloud://cloud.starwhale.cn/project/public:starwhale/dataset/mnist/version/latest -t t1 --force-add
    swcli dataset tag mnist -t t1 --quiet

    #- remove tags for the mnist dataset
    swcli dataset tag mnist -r -t t1 -t t2
    swcli dataset tag cloud://cloud.starwhale.cn/project/public:starwhale/dataset/mnist --remove -t t1
    - + \ No newline at end of file diff --git a/zh/0.5.10/reference/swcli/index.html b/zh/0.5.10/reference/swcli/index.html index 99296689c..2c902bd47 100644 --- a/zh/0.5.10/reference/swcli/index.html +++ b/zh/0.5.10/reference/swcli/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    概述

    使用方式

    swcli [选项] <COMMAND> [参数]...
    备注

    swcliswstarwhale三个命令的作用是一样的。

    全局选项

    选项说明
    --version显示swcli的版本信息。
    --verbose-v日志中输出更多信息,当 -v 参数越多,呈现信息越多,最多支持4个 -v 参数。
    --help输出命令帮助信息。
    警告

    需要注意的是,全局参数需要跟在swcli之后,命令之前。

    命令

    - + \ No newline at end of file diff --git a/zh/0.5.10/reference/swcli/instance/index.html b/zh/0.5.10/reference/swcli/instance/index.html index 4b8e8f307..20fd1a79c 100644 --- a/zh/0.5.10/reference/swcli/instance/index.html +++ b/zh/0.5.10/reference/swcli/instance/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    swcli instance

    概述

    swcli [全局选项] instance [选项] <SUBCOMMAND> [参数]

    instance命令包括以下子命令:

    • info
    • list (ls)
    • login
    • logout
    • use (select)

    swcli instance info

    swcli [全局选项] instance info [选项] <INSTANCE>

    instance info 输出指定 Starwhale 实例的详细信息。

    INSTANCE 是一个实例URI

    swcli instance list

    swcli [全局选项] instance list [选项]

    instance list 显示所有的 Starwhale 实例。

    swcli instance login

    swcli [全局选项] instance login [选项] <INSTANCE>

    instance login 连接到一个 Server/Cloud 实例并将它设置为默认实例.

    INSTANCE 是一个实例URI

    选项必填项类型默认值说明
    --usernameNString登录用户名
    --passwordNString登录密码
    --tokenNString登录令牌
    --aliasYString实例别名。您可以在任何需要实例URI的地方使用对应的别名替代。

    --username--password 不能和 --token 一起使用。

    swcli instance logout

    swcli [全局选项] instance logout [INSTANCE]

    instance logout 断开和 Server/Cloud 实例的连接并清除本地保存的信息。

    INSTANCE是一个实例URI。如果不指定,将使用默认实例

    swcli instance use

    swcli [全局选项] instance use <INSTANCE>

    instance use 将指定的实例设置为默认实例.

    INSTANCE 是一个实例URI

    - + \ No newline at end of file diff --git a/zh/0.5.10/reference/swcli/job/index.html b/zh/0.5.10/reference/swcli/job/index.html index d31b288b6..87b1f3a81 100644 --- a/zh/0.5.10/reference/swcli/job/index.html +++ b/zh/0.5.10/reference/swcli/job/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    swcli job

    概述

    swcli [全局选项] job [选项] <子命令> [参数]...

    job命令包括以下子命令:

    • cancel
    • info
    • list(ls)
    • pause
    • recover
    • remove(rm)
    • resume

    swcli job cancel

    swcli [全局选项] job cancel [选项] <JOB>

    job cancel 停止指定的作业。该命令在 Standalone 实例下只对容器方式运行的作业生效。

    JOB 是一个作业URI

    选项必填项类型默认值说明
    --force or -fNBooleanFalse如果为真,强制停止指定的作业。

    swcli job info

    swcli [全局选项] job info [选项] <JOB>

    job info 输出指定作业的详细信息。

    JOB 是一个作业URI

    选项必填项类型默认值说明
    --pageNInteger1起始页码。仅限 Server 和 Cloud 实例。
    --sizeNInteger20一页中的作业数。仅限 Server 和 Cloud 实例。

    swcli job list

    swcli [全局选项] job list [选项]

    job list显示所有的 Starwhale 作业。

    选项必填项类型默认值说明
    --projectNString要查看的项目的 URI。如果未指定此选项,则使用默认项目替代。
    --show-removedNBooleanFalse如果使用了该选项,则结果中会包含已删除但未被垃圾回收的作业。
    --pageNInteger1起始页码。仅限 Server 和 Cloud 实例。
    --sizeNInteger20一页中的作业数。仅限 Server 和 Cloud 实例。

    swcli job pause

    swcli [全局选项] job pause [选项] <JOB>

    job pause 暂停指定的作业. 被暂停的作业可以使用 job resume 恢复。该命令在 Standalone 实例下只对容器方式运行的作业生效。

    JOB 是一个作业URI

    pausecancel 功能上基本相同。它们的差别在于被暂停的作业会保留作业ID,在恢复时继续使用。作业的开发者需要定期保存作业数据并在恢复的时候重新加载相关数据。作业ID 可以用作保存数据的键值。

    选项必填项类型默认值说明
    --force or -fNBooleanFalse如果为真,强制停止指定的作业。

    swcli job resume

    swcli [全局选项] job resume [选项] <JOB>

    job resume 恢复指定的作业。该命令在 Standalone 实例下只对容器方式运行的作业生效。

    JOB 是一个作业URI

    - + \ No newline at end of file diff --git a/zh/0.5.10/reference/swcli/model/index.html b/zh/0.5.10/reference/swcli/model/index.html index efbd08d70..55c473fa5 100644 --- a/zh/0.5.10/reference/swcli/model/index.html +++ b/zh/0.5.10/reference/swcli/model/index.html @@ -10,14 +10,14 @@ - +
    版本:0.5.10

    swcli model

    概述

    swcli [全局选项] model [选项] <SUBCOMMAND> [参数]...

    model命令包括以下子命令:

    • build
    • copy(cp)
    • diff
    • extract
    • history
    • info
    • list(ls)
    • recover
    • remove(rm)
    • run
    • serve
    • tag

    swcli model build

    swcli [全局选项] model build [选项] <WORKDIR>

    model build 会将整个 WORKDIR 打包到Starwhale模型中,.swignore匹配的文件除外。

    model build 会导入 --module 参数指定的模块,然后生成运行模型所需要的配置。如果您指定的模块依赖第三方库,我们强烈建议您使用 --runtime 选项。如果不指定该选项,您需要确保 swcli 所使用的 Python 环境已经安装了相关的依赖库。

    选项必填项类型默认值说明
    --project-pNString默认项目项目URI
    --model-yaml-fNString${workdir}/model.yamlmodel.yaml 文件路径,默认会尝试使用 ${workdir}/model.yaml 文件。model.yaml 对于模型构建并非必需的。
    --module-mNString构建时导入的模块。Starwhale 会将这些模块中包含的 handler 导出到模型包。该参数可以指定多次,用来导入多个 Python 模块。
    --runtimeNString运行此命令时使用的 Starwhale Runtime的URI。如果指定此选项,该命令将在 Starwhale 运行时指定的独立 Python 环境中运行。否则它将直接在 swcli 当前的 Python 环境中运行。
    --name-nNString模型包的名字
    --desc-dNString模型包的描述
    --package-runtime--no-package-runtimeNBooleanTrue当使用 --runtime 参数时,默认情况下,会将对应的 Starwhale 运行时变成 Starwhale 模型的内置运行时。可以通过 --no-package-runtime 参数禁用该特性。
    --add-allNBooleanFalseStarwhale 模型打包的时候会自动忽略一些类似 pyc/venv/conda 构建目录等,可以通过该参数将这些文件也进行打包。即使该参数使用,也不影响 .swignore 文件的预期作用。
    -t--tagN全局String

    Starwhale 模型构建的例子

    # build by the model.yaml in current directory and model package will package all the files from the current directory.
    swcli model build .
    # search model run decorators from mnist.evaluate, mnist.train and mnist.predict modules, then package all the files from the current directory to model package.
    swcli model build . --module mnist.evaluate --module mnist.train --module mnist.predict
    # build model package in the Starwhale Runtime environment.
    swcli model build . --module mnist.evaluate --runtime pytorch/version/v1
    # forbid to package Starwhale Runtime into the model.
    swcli model build . --module mnist.evaluate --runtime pytorch/version/v1 --no-package-runtime
    # build model package with tags.
    swcli model build . --tag tag1 --tag tag2

    swcli model copy

    swcli [全局选项] model copy [选项] <SRC> <DEST>

    model copy 将模型从 SRC 复制到 DEST,用来实现不同实例的模型分享。这里 SRCDEST 都是模型URI

    Starwhale 模型复制时,默认会带上用户自定义的所有标签,可以使用 --ignore-tag 参数,忽略某些标签。另外,latest^v\d+$ 标签是 Starwhale 系统内建标签,只在当前实例中使用,不会拷贝到其他实例中。

    选项必填项类型默认值说明
    --force-fNBooleanFalse如果为true,DEST已经存在时会被强制覆盖。否则此命令会显示一条错误消息。另外,如果复制时携带的标签已经被其他版本使用,通过该参数可以强制更新标签到此版本上。
    -i--ignore-tagNString可以指定多次,忽略多个用户自定义标签。

    Starwhale 模型复制的例子

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local project(myproject) with a new model name 'mnist-local'
    swcli model cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq local/project/myproject/mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local default project(self) with the cloud instance model name 'mnist-cloud'
    swcli model cp cloud://pre-k8s/project/model/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq .

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local project(myproject) with the cloud instance model name 'mnist-cloud'
    swcli model cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq . -dlp myproject

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local default project(self) with a model name 'mnist-local'
    swcli model cp cloud://pre-k8s/project/model/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local project(myproject) with a model name 'mnist-local'
    swcli model cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local -dlp myproject

    #- copy standalone instance(local) default project(self)'s mnist-local model to cloud instance(pre-k8s) mnist project with a new model name 'mnist-cloud'
    swcli model cp mnist-local/version/latest cloud://pre-k8s/project/mnist/mnist-cloud

    #- copy standalone instance(local) default project(self)'s mnist-local model to cloud instance(pre-k8s) mnist project with standalone instance model name 'mnist-local'
    swcli model cp mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy standalone instance(local) default project(self)'s mnist-local model to cloud instance(pre-k8s) mnist project without 'cloud://' prefix
    swcli model cp mnist-local/version/latest pre-k8s/project/mnist

    #- copy standalone instance(local) project(myproject)'s mnist-local model to cloud instance(pre-k8s) mnist project with standalone instance model name 'mnist-local'
    swcli model cp local/project/myproject/model/mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy without some tags
    swcli model cp mnist cloud://cloud.starwhale.cn/project/starwhale:public --ignore-tag t1

    swcli model diff

    swcli [全局选项] model diff [选项] <MODEL VERSION> <MODEL VERSION>

    model diff 比较同一模型的两个版本之间的差异。

    MODEL VERSION 是一个模型URI

    选项必填项类型默认值说明
    --show-detailsNBooleanFalse使用该选项输出详细的差异信息。

    swcli model extract

    swcli [全局选项] model extract [选项] <MODEL> <TARGET_DIR>

    model extract 能够对将Starwhale 模型解压到指定目录中,方便进行后续改造。

    MODEL 是一个模型URI

    选项必填项类型默认值说明
    --force-fNBooleanFalse如果使用了该选项,会强制覆盖目标目录已经存在的模型解压文件。

    Starwhale 模型解压的例子

    #- extract mnist model package to current directory
    swcli model extract mnist/version/xxxx .

    #- extract mnist model package to current directory and force to overwrite the files
    swcli model extract mnist/version/xxxx . -f

    swcli model history

    swcli [全局选项] model history [选项] <MODEL>

    model history输出指定Starwhale模型的所有历史版本。

    MODEL是一个模型URI

    选项必填项类型默认值说明
    --fullnameNBooleanFalse显示完整的版本名称。如果没有使用该选项,则仅显示前 12 个字符。

    swcli model info

    swcli [全局选项] model info [选项] <MODEL>

    model info输出指定Starwhale模型版本的详细信息。

    MODEL是一个模型URI

    选项必填项类型默认值说明
    --output-filter-ofNChoice of [basic/model_yaml/manifest/files/handlers/all]basic设置输出的过滤规则,比如只显示Model的model.yaml。目前该参数仅对Standalone Instance的Model生效。

    Starwhale 模型信息查看的例子

    swcli model info mnist # show basic info from the latest version of model
    swcli model info mnist/version/v0 # show basic info from the v0 version of model
    swcli model info mnist/version/latest --output-filter=all # show all info
    swcli model info mnist -of basic # show basic info
    swcli model info mnist -of model_yaml # show model.yaml
    swcli model info mnist -of handlers # show model runnable handlers info
    swcli model info mnist -of files # show model package files tree
    swcli -o json model info mnist -of all # show all info in json format

    swcli model list

    swcli [全局选项] model list [选项]

    model list显示所有的Starwhale模型。

    选项必填项类型默认值说明
    --projectNString要查看的项目的URI。如果未指定此选项,则使用默认项目替代。
    --fullnameNBooleanFalse显示完整的版本名称。如果没有使用该选项,则仅显示前 12 个字符。
    --show-removedNBooleanFalse如果使用了该选项,则结果中会包含已删除但未被垃圾回收的模型。
    --pageNInteger1起始页码。仅限Server和Cloud实例。
    --sizeNInteger20一页中的模型数。仅限Server和Cloud实例。
    --filter-flNString仅显示符合条件的模型。该选项可以在一个命令中被多次重复使用。
    过滤器类型说明范例
    nameKey-Value模型名称前缀--filter name=mnist
    ownerKey-Value模型所有者名字--filter owner=starwhale
    latestFlag如果指定了该选项,结果中仅显示最新版本。--filter latest

    swcli model recover

    swcli [全局选项] model recover [选项] <MODEL>

    model recover 恢复以前删除的 Starwhale 模型或版本。

    MODEL是一个模型URI。如果 URI 不包含版本,则会恢复所有删除的版本。

    已经被垃圾回收或者使用 --force 选项删除的 Starwhale 模型或版本无法使用本命令恢复。

    选项必填项类型默认值说明
    --force-fNBooleanFalse如果使用了该选项,当前同名的 Starwhale 模型或版本会被强制覆盖。

    swcli model remove

    swcli [全局选项] model remove [选项] <MODEL>

    model remove 删除指定的 Starwhale 模型或某个版本。

    MODEL 是一个模型URI。如果URI不包含版本,则删除指定模型的所有版本。

    被删除的 Starwhale 模型或版本可以在垃圾回收之前通过 swcli model recover 恢复。要永久删除某个Starwhale模型或版本,您可以使用 --force 选项。

    被删除的 Starwhale 模型或版本可以通过 swcli model list --show-removed 列出。

    选项必填项类型默认值说明
    --force-fNBooleanFalse使用此选项永久删除某个Starwhale模型或版本。删除后不可恢复。

    swcli model run

    swcli [全局选项] model run [选项]

    model run 运行一个模型的 Handler。该命令提供两种模式: model URI模式和本地开发模式。 model URI模式需要一个预先构建好的模型包,本地开发模式仅需要 model 代码目录即可。

    选项必填项类型默认值说明
    --workdir-wNString在本地开发模式中使用,指定 model 代码目录地址。
    --uri-uNString在model URI模式中使用,指定 model URI。
    --handler-hNString运行的Handler索引或名字,默认运行第一个Handler。格式为序号或Handler的名字。
    --module-mNStringPython Module 的名字,是一个可以被 import 的 Python Module 路径。该参数可以被设置多次。
    --runtime-rNString运行此命令时使用的Starwhale Runtime的 URI。如果指定此选项,该命令将在 Starwhale 运行时指定的独立 Python 环境中运行。否则它将直接在 swcli 当前的 Python 环境中运行。
    --model-yaml-fNString${MODEL_DIR}/model.yamlmodel.yaml 的路径。model.yaml 对于 model run 是非必须的。
    --run-project-pNString默认的 ProjectProject URI,表示 model run 的结果存储到对应的项目中。
    --dataset-dNStringDataset URI,模型运行所需要的 Starwhale 数据集。该参数可以被设置多次。
    --in-containerNBooleanFalse使用docker镜像来运行模型。此选项仅适用于 Standalone 实例。Server 和 Cloud 实例始终使用 docker 镜像。如果指定的 runtime 是基于 docker 镜像构建的,此选项总是为真。
    --forbid-snapshot-fsNBooleanFalse当在model URI模式下,每次模型运行,都会使用一个全新的快照目录,设置该参数后直接使用模型的 workdir 目录作为运行目录。本地开发模式下,此参数不生效,每次运行都是在 --workdir 指定的目录中。

    Starwhale 模型运行的例子

    # --> run by model uri
    # run the first handler from model uri
    swcli model run -u mnist/version/latest
    # run index id(1) handler from model uri
    swcli model run --uri mnist/version/latest --handler 1
    # run index fullname(mnist.evaluator:MNISTInference.cmp) handler from model uri
    swcli model run --uri mnist/version/latest --handler mnist.evaluator:MNISTInference.cmp

    # --> run by the working directory, which does not build model package yet. Make local debug happy.
    # run the first handler from the working directory, use the model.yaml in the working directory
    swcli model run -w .
    # run index id(1) handler from the working directory, search mnist.evaluator module and model.yaml handlers(if existed) to get runnable handlers
    swcli model run --workdir . --module mnist.evaluator --handler 1
    # run index fullname(mnist.evaluator:MNISTInference.cmp) handler from the working directory, search mnist.evaluator module to get runnable handlers
    swcli model run --workdir . --module mnist.evaluator --handler mnist.evaluator:MNISTInference.cmp

    swcli model serve

    swcli [全局选项] model serve [选项]

    model serve 命令可以以Web Server方式运行模型,并提供简易的 Web 交互界面。

    选项必填项类型默认值说明
    --workdir-wNString在本地开发模式中使用,指定 model 代码目录地址。
    --uri-uNString在 model URI模式中使用,指定 model URI。
    --runtime-rNString运行此命令时使用的Starwhale Runtime的 URI。如果指定此选项,该命令将在 Starwhale 运行时指定的独立 Python 环境中运行。否则它将直接在 swcli 当前的 Python 环境中运行。
    --model-yaml-fNString${MODEL_DIR}/model.yamlmodel.yaml 的路径。model.yaml 对于 model serve 是非必须的。
    --module-mNStringPython Module 的名字,是一个可以被 import 的 Python Module 路径。该参数可以被设置多次。
    --hostNString127.0.0.1服务监听的地址
    --portNInteger8080服务监听的端口

    Starwhale 模型 Serving 的例子

    swcli model serve -u mnist
    swcli model serve --uri mnist/version/latest --runtime pytorch/version/latest

    swcli model serve --workdir . --runtime pytorch/version/v0
    swcli model serve --workdir . --runtime pytorch/version/v1 --host 0.0.0.0 --port 8080
    swcli model serve --workdir . --runtime pytorch --module mnist.evaluator

    swcli model tag

    swcli [全局选项] model tag [选项] <MODEL> [TAGS]...

    model tag将标签附加到指定的Starwhale模型版本,同时支持删除和列出所有标签的功能。可以在模型URI中使用标签替代版本ID。

    MODEL是一个模型URI

    每个模型版本可以包含任意数量的标签,但同一模型中不允许有重复的标签名称。

    选项必填项类型默认值说明
    --remove-rNBooleanFalse使用该选项删除标签
    --quiet-qNBooleanFalse使用该选项以忽略错误,例如删除不存在的标签。
    --force-add-fNBooleanFalse当向 server/cloud 实例中添加标签时,若遇到其他版本的模型已经使用该标签会提示报错,强制更新时可以使用 --force-add 参数。

    Starwhale 模型标签的例子

    #- list tags of the mnist model
    swcli model tag mnist

    #- add tags for the mnist model
    swcli model tag mnist -t t1 -t t2
    swcli model tag cloud://cloud.starwhale.cn/project/public:starwhale/model/mnist/version/latest -t t1 --force-add
    swcli model tag mnist -t t1 --quiet

    #- remove tags for the mnist model
    swcli model tag mnist -r -t t1 -t t2
    swcli model tag cloud://cloud.starwhale.cn/project/public:starwhale/model/mnist --remove -t t1
    - + \ No newline at end of file diff --git a/zh/0.5.10/reference/swcli/project/index.html b/zh/0.5.10/reference/swcli/project/index.html index 19f857758..a365bc03a 100644 --- a/zh/0.5.10/reference/swcli/project/index.html +++ b/zh/0.5.10/reference/swcli/project/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    swcli project

    Overview

    swcli [全局选项] project [选项] <子命令> [参数]...

    project命令包括以下子命令:

    • create(add, new)
    • info
    • list(ls)
    • recover
    • remove(ls)
    • use(select)

    swcli project create

    swcli [全局选项] project create <PROJECT>

    project create 创建一个新的项目。

    PROJECT 是一个项目URI

    swcli project info

    swcli [全局选项] project info [选项] <PROJECT>

    project info 输出指定项目的详细信息。

    PROJECT 是一个项目URI

    swcli project list

    swcli [全局选项] project list [选项]

    project list 显示所有的项目。

    选项必填项类型默认值说明
    --instanceNString要显示的实例 URI。如果不指定该选项,则显示默认实例.
    --show-removedNBooleanFalse如果使用了该选项,则结果中会包含已删除但未被垃圾回收的项目。
    --pageNInteger1起始页码。仅限 Server 和 Cloud 实例。
    --sizeNInteger20一页中的项目数。仅限 Server 和 Cloud 实例。

    swcli project recover

    swcli [全局选项] project recover [选项] <PROJECT>

    project recover 恢复以前删除的项目。

    PROJECT 是一个项目URI

    已经被垃圾回收或者使用 --force 选项删除的项目无法使用本命令恢复。

    swcli project remove

    swcli [全局选项] project remove [选项] <PROJECT>

    project remove 删除指定的项目。

    PROJECT 是一个项目URI

    被删除的项目可以在垃圾回收之前通过 swcli project recover 恢复。要永久删除某个项目,您可以使用 --force 选项。

    被删除的项目可以通过 swcli project list --show-removed 列出。

    选项必填项类型默认值说明
    --force-fNBooleanFalse使用此选项永久删除某个 Starwhale 模型或版本。删除后不可恢复。

    swcli project use

    swcli [全局选项] project use <PROJECT>

    project use 将指定的项目设置为默认项目。如果要指定 Server/Cloud 实例上的项目,您需要先登录才能运行本命令。

    - + \ No newline at end of file diff --git a/zh/0.5.10/reference/swcli/runtime/index.html b/zh/0.5.10/reference/swcli/runtime/index.html index 51e9896f3..de969fe1b 100644 --- a/zh/0.5.10/reference/swcli/runtime/index.html +++ b/zh/0.5.10/reference/swcli/runtime/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    swcli runtime

    概述

    swcli [全局选项] runtime [选项] <SUBCOMMAND> [参数]...

    runtime 命令包括以下子命令:

    • activate(actv)
    • build
    • copy(cp)
    • dockerize
    • extract
    • history
    • info
    • list(ls)
    • recover
    • remove(rm)
    • tag

    swcli runtime activate

    swcli [全局选项] runtime activate [选项] <RUNTIME>

    runtime activate 根据指定的运行时创建一个全新的 Python 环境,类似 source venv/bin/activateconda activate xxx 的效果。关闭当前 shell 或切换到其他 shell 后,需要重新激活 Runtime。URI 参数为 Runtime URI。

    对于已经激活的 Starwhale 运行时,如果想要退出该环境,需要在 venv 环境中执行 deactivate 命令或conda环境中执行 conda deactivate 命令。

    runtime activate 命令首次激活环境的时候,会根据 Starwhale 运行时的定义,构建一个 Python 隔离环境,并下载相关的 Python Packages ,可能会花费比较的时间。

    swcli runtime build

    swcli [全局选项] runtime build [选项]

    runtime build 命令可以从多种环境或 runtime.yaml ,构建一个可以分享、可以复现的适合 ML/DL 领域的运行环境。

    参数说明

    • 运行时构建方式的相关参数:
    选项必填项类型默认值说明
    -c--condaNString通过 conda env name 寻找对应的 conda 环境,导出 Python 依赖后生成 Starwhale 运行时。
    -cp--conda-prefixNString通过 conda env prefix 路径寻找对应的 conda 环境,导出 Python 依赖后生成 Starwhale 运行时。
    -v--venvNString通过 venv 目录地址寻找对应的 venv 环境,导出 Python 依赖后生成 Starwhale 运行时。
    -s--shellNString根据当前 shell 环境,导出 Python 依赖后生成 Starwhale 运行时。
    -y--yamlNcwd 目录的 runtime.yaml根据用户自定义的 runtime.yaml 构建 Starwhale 运行时。
    -d--dockerNString将 docker image 作为 Starwhale 运行时。

    运行时构建方式的相关参数是互斥的,只能指定一种方式,如果不指定,则会采用 --yaml 方式读取 cwd 目录下的 runtime.yaml 文件进行 Starwhale 运行时的构建。

    • 其他参数:
    选项必填项作用域类型默认值说明
    --project-pN全局String默认项目项目URI
    -del--disable-env-lockNruntime.yaml 模式BooleanFalse是否安装 runtime.yaml 中的依赖,并锁定相关依赖的版本信息。默认会锁定依赖。
    -nc--no-cacheNruntime.yaml 模式BooleanFalse是否删除隔离环境,全新安装相关依赖。默认会在之前的隔离环境中安装依赖。
    --cudaNconda/venv/shell 模式Choice[11.3/11.4/11.5/11.6/11.7/]CUDA 版本,默认不使用 CUDA。
    --cudnnNconda/venv/shell 模式Choice[8/]cuDNN 版本,默认不使用 cuDNN。
    --archNconda/venv/shell 模式Choice[amd64/arm64/noarch]noarch体系结构
    -epo--emit-pip-optionsN全局BooleanFalse是否导出 ~/.pip/pip.conf,默认导出。
    -ecc--emit-condarcN全局BooleanFalse是否导出 ~/.condarc,默认导出。
    -t--tagN全局String用户自定义标签,可以指定多次。

    Starwhale 运行时构建的例子

    #- from runtime.yaml:
    swcli runtime build # use the current directory as the workdir and use the default runtime.yaml file
    swcli runtime build -y example/pytorch/runtime.yaml # use example/pytorch/runtime.yaml as the runtime.yaml file
    swcli runtime build --yaml runtime.yaml # use runtime.yaml at the current directory as the runtime.yaml file
    swcli runtime build --tag tag1 --tag tag2

    #- from conda name:
    swcli runtime build -c pytorch # lock pytorch conda environment and use `pytorch` as the runtime name
    swcli runtime build --conda pytorch --name pytorch-runtime # use `pytorch-runtime` as the runtime name
    swcli runtime build --conda pytorch --cuda 11.4 # specify the cuda version
    swcli runtime build --conda pytorch --arch noarch # specify the system architecture

    #- from conda prefix path:
    swcli runtime build --conda-prefix /home/starwhale/anaconda3/envs/pytorch # get conda prefix path by `conda info --envs` command

    #- from venv prefix path:
    swcli runtime build -v /home/starwhale/.virtualenvs/pytorch
    swcli runtime build --venv /home/starwhale/.local/share/virtualenvs/pytorch --arch amd64

    #- from docker image:
    swcli runtime build --docker pytorch/pytorch:1.9.0-cuda11.1-cudnn8-runtime # use the docker image as the runtime directly

    #- from shell:
    swcli runtime build -s --cuda 11.4 --cudnn 8 # specify the cuda and cudnn version
    swcli runtime build --shell --name pytorch-runtime # lock the current shell environment and use `pytorch-runtime` as the runtime name

    swcli runtime copy

    swcli [全局选项] runtime copy [选项] <SRC> <DEST>

    runtime copy 将 runtime 从 SRC 复制到 DEST,可以实现不同实例之间的运行时分享。这里 SRCDEST 都是运行时URI

    Starwhale 运行时复制时,默认会带上用户自定义的所有标签,可以使用 --ignore-tag 参数,忽略某些标签。另外,latest^v\d+$ 标签是 Starwhale 系统内建标签,只在当前实例中使用,不会拷贝到其他实例中。

    选项必填项类型默认值说明
    --force-fNBooleanFalse如果为true,DEST已经存在时会被强制覆盖。否则此命令会显示一条错误消息。另外,如果复制时携带的标签已经被其他版本使用,通过该参数可以强制更新标签到此版本上。
    -i--ignore-tagNString可以指定多次,忽略多个用户自定义标签。

    Starwhale 运行时复制的例子

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local project(myproject) with a new runtime name 'mnist-local'
    swcli runtime cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq local/project/myproject/mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local default project(self) with the cloud instance runtime name 'mnist-cloud'
    swcli runtime cp cloud://pre-k8s/project/runtime/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq .

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local project(myproject) with the cloud instance runtime name 'mnist-cloud'
    swcli runtime cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq . -dlp myproject

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local default project(self) with a runtime name 'mnist-local'
    swcli runtime cp cloud://pre-k8s/project/runtime/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local project(myproject) with a runtime name 'mnist-local'
    swcli runtime cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local -dlp myproject

    #- copy standalone instance(local) default project(self)'s mnist-local runtime to cloud instance(pre-k8s) mnist project with a new runtime name 'mnist-cloud'
    swcli runtime cp mnist-local/version/latest cloud://pre-k8s/project/mnist/mnist-cloud

    #- copy standalone instance(local) default project(self)'s mnist-local runtime to cloud instance(pre-k8s) mnist project with standalone instance runtime name 'mnist-local'
    swcli runtime cp mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy standalone instance(local) default project(self)'s mnist-local runtime to cloud instance(pre-k8s) mnist project without 'cloud://' prefix
    swcli runtime cp mnist-local/version/latest pre-k8s/project/mnist

    #- copy standalone instance(local) project(myproject)'s mnist-local runtime to cloud instance(pre-k8s) mnist project with standalone instance runtime name 'mnist-local'
    swcli runtime cp local/project/myproject/runtime/mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy without some tags
    swcli runtime cp pytorch cloud://cloud.starwhale.cn/project/starwhale:public --ignore-tag t1

    swcli runtime dockerize

    swcli [全局选项] runtime dockerize [选项] <RUNTIME>

    runtime dockerize 基于指定的 runtime 创建一个 docker 镜像。Starwhale 使用 docker buildx 来创建镜像。运行此命令需要预先安装 Docker 19.03 以上的版本。

    RUNTIME是一个运行时URI

    选项必填项类型默认值说明
    --tag or -tNStringDocker镜像的tag,该选项可以重复多次。
    --pushNBooleanFalse是否将创建的镜像推送到docker registry。
    --platformNStringamd64镜像的运行平台,可以是amd64或者arm64。该选项可以重复多次用于创建多平台镜像。
    --dry-runNBooleanFalse只生成 Dockerfile 不实际生成和推送镜像。

    swcli runtime extract

    swcli [全局选项] runtime extract [选项] <RUNTIME>

    Starwhale 运行时以压缩包的方式分发,使用 runtime extract 命令可以解压运行时 Package,然后进行后续的自定义修改。

    选项必填项类型默认值说明
    --force-fNBooleanFalse如果目标目录已经有解压好的 Starwhale 运行时,是否删除后重新解压。
    --target-dirNString自定义解压的目录,如果不指定则会放到 Starwhale 默认的运行时 workdir 目录中,命令输出日志中会提示。

    swcli runtime history

    swcli [全局选项] runtime history [选项] <RUNTIME>

    runtime history输出指定Starwhale运行时的所有历史版本。

    RUNTIME是一个运行时URI

    选项必填项类型默认值说明
    --fullnameNBooleanFalse显示完整的版本名称。如果没有使用该选项,则仅显示前 12 个字符。

    swcli runtime info

    swcli [全局选项] runtime info [选项] <RUNTIME>

    runtime info输出指定Starwhale运行时版本的详细信息。

    RUNTIME是一个运行时URI

    选项必填项类型默认值说明
    --output-filter-ofNChoice of [basic/runtime_yaml/manifest/lock/all]basic设置输出的过滤规则,比如只显示Runtime的runtime.yaml。目前该参数仅对Standalone Instance的Runtime生效。

    Starwhale 运行时查看详情的例子

    swcli runtime info pytorch # show basic info from the latest version of runtime
    swcli runtime info pytorch/version/v0 # show basic info
    swcli runtime info pytorch/version/v0 --output-filter basic # show basic info
    swcli runtime info pytorch/version/v1 -of runtime_yaml # show runtime.yaml content
    swcli runtime info pytorch/version/v1 -of lock # show auto lock file content
    swcli runtime info pytorch/version/v1 -of manifest # show _manifest.yaml content
    swcli runtime info pytorch/version/v1 -of all # show all info of the runtime

    swcli runtime list

    swcli [全局选项] runtime list [选项]

    runtime list显示所有的 Starwhale 运行时。

    选项必填项类型默认值说明
    --projectNString要查看的项目的URI。如果未指定此选项,则使用默认项目替代。
    --fullnameNBooleanFalse显示完整的版本名称。如果没有使用该选项,则仅显示前 12 个字符。
    --show-removedNBooleanFalse如果使用了该选项,则结果中会包含已删除但未被垃圾回收的运行时。
    --pageNInteger1起始页码。仅限Server和Cloud实例。
    --sizeNInteger20一页中的运行时数量。仅限Server和Cloud实例。
    --filter-flNString仅显示符合条件的运行时。该选项可以在一个命令中被多次重复使用。
    过滤器类型说明范例
    nameKey-Value运行时名称前缀--filter name=pytorch
    ownerKey-Value运行时所有者名字--filter owner=starwhale
    latestFlag如果指定了该选项,结果中仅显示最新版本。--filter latest

    swcli runtime recover

    swcli [全局选项] runtime recover [选项] <RUNTIME>

    runtime recover 命令可以恢复以前删除的 Starwhale 运行时。

    RUNTIME是一个运行时URI。如果URI不包含版本,则会恢复所有删除的版本。

    已经被垃圾回收或者使用 --force 选项删除的 Starwhale 运行时或版本无法使用本命令恢复。

    选项必填项类型默认值说明
    --force-fNBooleanFalse如果使用了该选项,当前同名的Starwhale运行时或版本会被强制覆盖。

    swcli runtime remove

    swcli [全局选项] runtime remove [选项] <RUNTIME>

    runtime remove 命令可以删除指定的 Starwhale 运行时或某个版本。

    RUNTIME 是一个运行时URI。如果 URI 不包含版本,则删除所有版本。

    被删除的 Starwhale 运行时或版本可以在垃圾回收之前通过 swcli runtime recover 命令恢复。要永久删除某个 Starwhale 运行时或版本,您可以使用 --force 选项。

    被删除的 Starwhale 运行时或版本可以通过 swcli runtime list --show-removed 列出。

    选项必填项类型默认值说明
    --force-fNBooleanFalse使用此选项永久删除某个 Starwhale 运行时或版本。删除后不可恢复。

    swcli runtime tag

    swcli [全局选项] runtime tag [选项] <RUNTIME> [TAGS]...

    runtime tag 命令将标签附加到指定的 Starwhale 运行时版本,同时支持删除和列出所有标签的功能。可以在运行时URI中使用标签替代版本 ID。

    RUNTIME 是一个运行时URI

    每个运行时版本可以包含任意数量的标签,但同一运行时中不允许有重复的标签名称。

    runtime tag仅适用于 Standalone 实例.

    选项必填项类型默认值说明
    --remove-rNBooleanFalse使用该选项删除标签
    --quiet-qNBooleanFalse使用该选项以忽略错误,例如删除不存在的标签。
    --force-add-fNBooleanFalse当向 server/cloud 实例中添加标签时,若遇到其他版本的运行时已经使用该标签会提示报错,强制更新时可以使用 --force-add 参数。

    Starwhale 运行时标签的例子

    #- list tags of the pytorch runtime
    swcli runtime tag pytorch

    #- add tags for the pytorch runtime
    swcli runtime tag mnist -t t1 -t t2
    swcli runtime tag cloud://cloud.starwhale.cn/project/public:starwhale/runtime/pytorch/version/latest -t t1 --force-add
    swcli runtime tag mnist -t t1 --quiet

    #- remove tags for the pytorch runtime
    swcli runtime tag mnist -r -t t1 -t t2
    swcli runtime tag cloud://cloud.starwhale.cn/project/public:starwhale/runtime/pytorch --remove -t t1
    - + \ No newline at end of file diff --git a/zh/0.5.10/reference/swcli/utilities/index.html b/zh/0.5.10/reference/swcli/utilities/index.html index bf6cad02d..58973cadf 100644 --- a/zh/0.5.10/reference/swcli/utilities/index.html +++ b/zh/0.5.10/reference/swcli/utilities/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    其他命令

    swcli gc

    swcli [全局选项] gc [选项]

    gc根据内部的垃圾回收策略清理已经被删除的项目、模型、数据集和运行时。

    选项必填项类型默认值说明
    --dry-runNBooleanFalse如果为真,仅输出将被删除的对象而不清理。
    --yesNBooleanFalse跳过所有需要确认的项目。

    swcli check

    swcli [全局选项] check

    检查 swcli 命令的外部依赖是否满足条件,目前主要检查 Docker 和 Conda。

    swcli completion install

    swcli [全局选项] completion install <SHELL_NAME>

    安装 swcli 命令补全,目前支持 bash, zsh 和 fish。如果不指定 SHELL_NAME,则尝试主动探测当前shell类型。

    swcli config edit

    swcli [全局选项] config edit

    编辑 Starwhale 配置文件,即 ~/.config/starwhale/config.yaml

    swcli ui

    swcli [全局选项] ui <INSTANCE>

    打开对应实例的Web页面。

    - + \ No newline at end of file diff --git a/zh/0.5.10/runtime/index.html b/zh/0.5.10/runtime/index.html index edddc6294..96ad2befe 100644 --- a/zh/0.5.10/runtime/index.html +++ b/zh/0.5.10/runtime/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    Starwhale 运行时

    概览

    Starwhale 运行时能够针对运行Python程序,提供一种可复现、可分享的运行环境。使用 Starwhale 运行时,可以非常容易的与他人分享,并且能在 Starwhale Server 和 Starwhale Cloud 实例上使用 Starwhale 运行时。

    Starwhale 运行时使用 venv, conda 和 docker 等基础技术,如果您当前正在使用这些技术,可以非常容易的将这个环境转化为 Starwhale 运行时。

    对于本地环境,Starwhale 运行时支持非常容易的多种环境管理和切换。Starwhale 运行时包含基础镜像和环境依赖两个部分。

    基础镜像

    Starwhale 基础镜像中会安装 Python, CUDA, cuDNN 和其他一些机器学习开发中必要的基础库。Starwhale 运行时提供多种基础镜像供选择,列表如下:

    • 体系结构:
      • X86 (amd64)
      • Arm (aarch64)
    • 操作系统:
      • Ubuntu 20.04 LTS (ubuntu:20.04)
    • Python:
      • 3.7
      • 3.8
      • 3.9
      • 3.10
      • 3.11
    • CUDA:
      • CUDA 11.3 + cuDNN 8.4
      • CUDA 11.4 + cuDNN 8.4
      • CUDA 11.5 + cuDNN 8.4
      • CUDA 11.6 + cuDNN 8.4
      • CUDA 11.7

    runtime.yaml 通过相关设置来决定使用何种基础镜像。

    - + \ No newline at end of file diff --git a/zh/0.5.10/runtime/yaml/index.html b/zh/0.5.10/runtime/yaml/index.html index 805c9fdcd..fb3371ad6 100644 --- a/zh/0.5.10/runtime/yaml/index.html +++ b/zh/0.5.10/runtime/yaml/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    runtime.yaml 使用指南

    runtime.yaml 是构建 Starwhale 运行时的描述文件,用户可以细粒度的定义 Starwhale 运行时的各种属性。当使用 swcli runtime build 命令中 yaml 模式时,需要提供 runtime.yaml 文件。

    使用示例

    最简示例

    dependencies:
    - pip:
    - numpy
    name: simple-test

    定义一个以 venv 作为Python 包隔离方式,安装numpy依赖的 Starwhale 运行时。

    llama2 示例

    name: llama2
    mode: venv
    environment:
    arch: noarch
    os: ubuntu:20.04
    cuda: 11.7
    python: "3.10"
    dependencies:
    - pip:
    - torch
    - fairscale
    - fire
    - sentencepiece
    - gradio >= 3.37.0
    # external starwhale dependencies
    - starwhale[serve] >= 0.5.5

    完整字段示例

    # [required]The name of Starwhale Runtime
    name: demo
    # [optional]The mode of Starwhale Runtime: venv or conda. Default is venv.
    mode: venv
    # [optional]The configurations of pip and conda.
    configs:
    # If you do not use conda, ignore this field.
    conda:
    condarc: # custom condarc config file
    channels:
    - defaults
    show_channel_urls: true
    default_channels:
    - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main
    - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/r
    - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/msys2
    custom_channels:
    conda-forge: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
    pytorch: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
    pytorch-lts: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
    nvidia: https://mirrors.aliyun.com/anaconda/cloud
    ssl_verify: false
    default_threads: 10
    pip:
    # pip config set global.index-url
    index_url: https://example.org/
    # pip config set global.extra-index-url
    extra_index_url: https://another.net/
    # pip config set install.trusted-host
    trusted_host:
    - example.org
    - another.net
    # [optional] The definition of the environment.
    environment:
    # Now it must be ubuntu:20.04
    os: ubuntu:20.04
    # CUDA version. possible values: 11.3, 11.4, 11.5, 11.6, 11.7
    cuda: 11.4
    # Python version. possible values: 3.7, 3.8, 3.9, 3.10, 3.11
    python: 3.8
    # Define your custom base image
    docker:
    image: mycustom.com/docker/image:tag
    # [required] The dependencies of the Starwhale Runtime.
    dependencies:
    # If this item is present, conda env create -f conda.yml will be executed
    - conda.yaml
    # If this item is present, pip install -r requirements.txt will be executed before installing other pip packages
    - requirements.txt
    # Packages to be install with conda. venv mode will ignore the conda field.
    - conda:
    - numpy
    - requests
    # Packages to be installed with pip. The format is the same as requirements.txt
    - pip:
    - pillow
    - numpy
    - deepspeed==0.9.0
    - safetensors==0.3.0
    - transformers @ git+https://github.com/huggingface/transformers.git@3c3108972af74246bc3a0ecf3259fd2eafbacdef
    - peft @ git+https://github.com/huggingface/peft.git@fcff23f005fc7bfb816ad1f55360442c170cd5f5
    - accelerate @ git+https://github.com/huggingface/accelerate.git@eba6eb79dc2ab652cd8b44b37165a4852768a8ac
    # Additional wheels packages to be installed when restoring the runtime
    - wheels:
    - dummy-0.0.0-py3-none-any.whl
    # Additional files to be included in the runtime
    - files:
    - dest: bin/prepare.sh
    name: prepare
    src: scripts/prepare.sh
    # Run some custom commands
    - commands:
    - apt-get install -y libgl1
    - touch /tmp/runtime-command-run.flag
    - + \ No newline at end of file diff --git a/zh/0.5.10/server/guides/server_admin/index.html b/zh/0.5.10/server/guides/server_admin/index.html index e2f9b3a32..a85becdab 100644 --- a/zh/0.5.10/server/guides/server_admin/index.html +++ b/zh/0.5.10/server/guides/server_admin/index.html @@ -10,14 +10,14 @@ - +
    版本:0.5.10

    Starwhale Server 系统设置

    超级管理员密码重置

    一旦您忘记了超级管理员的密码, 您可以通过下面的SQL语句将密码重置为 abcd1234

    update user_info set user_pwd='ee9533077d01d2d65a4efdb41129a91e', user_pwd_salt='6ea18d595773ccc2beacce26' where id=1

    重置后,您可以使用上述密码登录到console。 然后再次修改密码为您想要的密码。

    系统设置

    您可以在 Starwhale Server Web 界面中对系统设置进行更改,目前支持runtime的docker镜像源修改以及资源池的划分等。下面是系统设置的一个例子:

    dockerSetting:
    registryForPull: "docker-registry.starwhale.cn/star-whale"
    registryForPush: ""
    userName: ""
    password: ""
    insecure: true
    pypiSetting:
    indexUrl: ""
    extraIndexUrl: ""
    trustedHost: ""
    retries: 10
    timeout: 90
    imageBuild:
    resourcePool: ""
    image: ""
    clientVersion: ""
    pythonVersion: ""
    datasetBuild:
    resourcePool: ""
    image: ""
    clientVersion: ""
    pythonVersion: ""
    resourcePoolSetting:
    - name: "default"
    nodeSelector: null
    resources:
    - name: "cpu"
    max: null
    min: null
    defaults: 5.0
    - name: "memory"
    max: null
    min: null
    defaults: 3145728.0
    - name: "nvidia.com/gpu"
    max: null
    min: null
    defaults: null
    tolerations: null
    metadata: null
    isPrivate: null
    visibleUserIds: null
    storageSetting:
    - type: "minio"
    tokens:
    bucket: "users"
    ak: "starwhale"
    sk: "starwhale"
    endpoint: "http://10.131.0.1:9000"
    region: "local"
    hugeFileThreshold: "10485760"
    hugeFilePartSize: "5242880"
    - type: "s3"
    tokens:
    bucket: "users"
    ak: "starwhale"
    sk: "starwhale"
    endpoint: "http://10.131.0.1:9000"
    region: "local"
    hugeFileThreshold: "10485760"
    hugeFilePartSize: "5242880"

    镜像源设置

    Server 下发的 Tasks 都是基于 docker 实现的,Starwhale Server 支持自定义镜像源,包括 dockerSetting.registryForPushdockerSetting.registryForPull

    资源池设置

    资源池实现了集群机器分组的功能。用户在创建任务时可以通过选择资源池将自己的任务下发到想要的机器组中。资源池可以理解为 Kubernetes 中的 nodeSelector,所以当您在K8S集群中给机器打上标签后,就可以在这里配置您的 resourcePool

    存储设置

    您可以通过存储设置来配置 Starwhale Server可以访问那些存储介质:

    storageSetting:
    - type: s3
    tokens:
    - bucket: starwhale # required
    ak: access_key # required
    sk: scret_key # required
    endpoint: http://s3.region.amazonaws.com # optional
    region: region of the service # required when endpoint is empty
    hugeFileThreshold: 10485760 # bigger than 10MB will use multiple part upload
    hugeFilePartSize: 5242880 # MB part size for multiple part upload
    - type: minio
    tokens:
    - bucket: starwhale # required
    ak: access_key # required
    sk: scret_key # required
    endpoint: http://10.131.0.1:9000 # required
    region: local # optional
    hugeFileThreshold: 10485760 # bigger than 10MB will use multiple part upload
    hugeFilePartSize: 5242880 # MB part size for multiple part upload
    - type: aliyun
    tokens:
    - bucket: starwhale # required
    ak: access_key # required
    sk: scret_key # required
    endpoint: http://10.131.0.2:9000 # required
    region: local # optional
    hugeFileThreshold: 10485760 # bigger than 10MB will use multiple part upload
    hugeFilePartSize: 5242880 # MB part size for multiple part upload

    每一个 storageSetting 条目都应该有一个StorageAccessService接口的实现. Starwhale目前有四个内置的实现:

    • StorageAccessServiceAliyun 可以处理 typealiyun 或者 oss 的条目
    • StorageAccessServiceMinio 可以处理typeminio 的条目
    • StorageAccessServiceS3 可以处理 types3 的条目
    • StorageAccessServiceFile 可以处理 typefs 或者 file 的条目

    不同的实现对 tokens 的要求是不一样的. 当 typealiyunminio或者oss的时候 endpoint 是 必填的。 当 endpoint 为空并且 types3 的时候 region 必填的。 而 fs/file 类型的存储则需要 rootDirserviceProvider 作为tokens的key. 更多细节请参阅代码。

    - + \ No newline at end of file diff --git a/zh/0.5.10/server/index.html b/zh/0.5.10/server/index.html index cef8949d5..9c50cd023 100644 --- a/zh/0.5.10/server/index.html +++ b/zh/0.5.10/server/index.html @@ -10,13 +10,13 @@ - + - + \ No newline at end of file diff --git a/zh/0.5.10/server/installation/docker/index.html b/zh/0.5.10/server/installation/docker/index.html index 07eb659ec..61d3305c3 100644 --- a/zh/0.5.10/server/installation/docker/index.html +++ b/zh/0.5.10/server/installation/docker/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    使用 Docker 安装 Starwhale Server

    先决条件

    • 1.19或者更高版本的Kubernetes集群用于执行任务。
    • MySQL 8.0以上版本的数据库实例用于存储元数据。
    • 兼容S3接口的对象存储,用于保存数据集、模型等。

    请确保您的Kubernetes集群上的pod可以访问Starwhale Server侦听的端口。

    为Docker准备env文件

    Starwhale Server可以通过环境变量进行配置。

    Docker的env文件模板参考此处。您可以通过修改模板来创建自己的env文件。

    准备kubeconfig文件

    kubeconfig文件用于访问Kubernetes集群。 有关kubeconfig文件的更多信息,请参阅官方Kubernetes文档

    如果您安装了kubectl命令行工具,可以运行 kubectl config view 来查看您当前的配置。

    启动Docker镜像

    docker run -it -d --name starwhale-server -p 8082:8082 \
    --restart unless-stopped \
    --mount type=bind,source=<您的kubeconfig文件路径>,destination=/root/.kube/config,readonly \
    --env-file <您的env文件路径> \
    docker-registry.starwhale.cn/star-whale/server:0.5.6

    对于非中国大陆网络用户,可以使用托管在 ghcr.io 上的镜像: ghcr.io/star-whale/server

    - + \ No newline at end of file diff --git a/zh/0.5.10/server/installation/helm-charts/index.html b/zh/0.5.10/server/installation/helm-charts/index.html index 1b02ff929..236317185 100644 --- a/zh/0.5.10/server/installation/helm-charts/index.html +++ b/zh/0.5.10/server/installation/helm-charts/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    使用 Helm 安装 Starwhale Server

    先决条件

    • 1.19或者更高版本的Kubernetes集群用于执行任务。
    • MySQL 8.0以上版本的数据库实例用于存储元数据。
    • 兼容S3接口的对象存储,用于保存数据集、模型等。
    • Helm 3.2.0+。

    Starwhale Helm charts 包括 MySQL 和 MinIO 作为依赖项。如果您没有自己的 MySQL 实例或任何与 AWS S3 兼容的对象存储可用,可以通过 Helm Chats 进行安装。请查看下文的安装选项以了解如何在安装 Starwhale Server 的同时安装 MySQL 和 MinIO。

    在 Kubernetes 上为 Starwhale Server 创建一个服务账号

    如果您的 Kubernetes 集群启用了 RBAC(在 Kubernetes 1.6+中,默认启用 RBAC),Starwhale Server 将无法正常工作,除非由至少具有以下权限的服务帐户启动:

    ResourceAPI GroupGetListWatchCreateDelete
    jobsbatchYYYYY
    podscoreYYY
    nodescoreYYY
    events""Y

    例子:

    apiVersion: rbac.authorization.k8s.io/v1
    kind: ClusterRole
    metadata:
    name: starwhale-role
    rules:
    - apiGroups:
    - ""
    resources:
    - pods
    - nodes
    verbs:
    - get
    - list
    - watch
    - apiGroups:
    - "batch"
    resources:
    - jobs
    verbs:
    - create
    - get
    - list
    - watch
    - delete
    - apiGroups:
    - ""
    resources:
    - events
    verbs:
    - get
    - watch
    - list
    ---
    apiVersion: rbac.authorization.k8s.io/v1
    kind: ClusterRoleBinding
    metadata:
    name: starwhale-binding
    roleRef:
    apiGroup: rbac.authorization.k8s.io
    kind: ClusterRole
    name: starwhale-role
    subjects:
    - kind: ServiceAccount
    name: starwhale

    下载 Starwhale Helm chart

    helm repo add starwhale https://star-whale.github.io/charts
    helm repo update

    安装Starwhale Server

    helm install starwhale-server starwhale/starwhale-server -n starwhale --create-namespace

    如果您安装了kubectl命令行工具,您可以运行 kubectl get pods -n starwhale 来检查是否所有 pod 都在正常运行中。

    更新 Starwhale Server

    helm repo update
    helm upgrade starwhale-server starwhale/starwhale-server

    卸载 Starwhale Server

    helm delete starwhale-server
    - + \ No newline at end of file diff --git a/zh/0.5.10/server/installation/index.html b/zh/0.5.10/server/installation/index.html index b488e02cf..ed4bc184b 100644 --- a/zh/0.5.10/server/installation/index.html +++ b/zh/0.5.10/server/installation/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    Starwhale Server 安装指南

    Starwhale Server 以 Docker 镜像的形式发布。您可以直接使用 Docker 运行,也可以部署到 Kubernetes 集群上。

    - + \ No newline at end of file diff --git a/zh/0.5.10/server/installation/minikube/index.html b/zh/0.5.10/server/installation/minikube/index.html index 24f98d116..afc17598e 100644 --- a/zh/0.5.10/server/installation/minikube/index.html +++ b/zh/0.5.10/server/installation/minikube/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    使用 Minikube 安装 Starwhale Server

    先决条件

    启动 Minikube

    minikube start --addons ingress --image-mirror-country=cn --kubernetes-version=1.25.3

    对于非中国大陆网络用户,可以省略 --image-mirror-country=cn 参数。另外,如果在您的机器上没有安装 kubectl,可以使用 Minikube 自带的 kubectl: minikube kubectl 或 bashrc中增加 alias kubectl="minikube kubectl --"

    安装 Starwhale Server

    helm repo add starwhale https://star-whale.github.io/charts
    helm repo update
    helm pull starwhale/starwhale --untar --untardir ./charts

    helm upgrade --install starwhale ./charts/starwhale -n starwhale --create-namespace -f ./charts/starwhale/values.minikube.cn.yaml

    对于非中国大陆网络用户,可以使用 values.minikube.global.yaml,命令如下:

    helm upgrade --install starwhale ./charts/starwhale -n starwhale --create-namespace -f ./charts/starwhale/values.minikube.global.yaml

    当成功安装后,会有类似如下的提示信息输出:

        Release "starwhale" has been upgraded. Happy Helming!
    NAME: starwhale
    LAST DEPLOYED: Tue Feb 14 16:25:03 2023
    NAMESPACE: starwhale
    STATUS: deployed
    REVISION: 14
    NOTES:
    ******************************************
    Chart Name: starwhale
    Chart Version: 0.5.6
    App Version: latest
    Starwhale Image:
    - server: ghcr.io/star-whale/server:latest

    ******************************************
    Controller:
    - visit: http://controller.starwhale.svc
    Minio:
    - web visit: http://minio.starwhale.svc
    - admin visit: http://minio-admin.starwhale.svc
    MySQL:
    - port-forward:
    - run: kubectl port-forward --namespace starwhale svc/mysql 3306:3306
    - visit: mysql -h 127.0.0.1 -P 3306 -ustarwhale -pstarwhale
    Please run the following command for the domains searching:
    echo "$(sudo minikube ip) controller.starwhale.svc minio.starwhale.svc minio-admin.starwhale.svc " | sudo tee -a /etc/hosts
    ******************************************
    Login Info:
    - starwhale: u:starwhale, p:abcd1234
    - minio admin: u:minioadmin, p:minioadmin

    *_* Enjoy to use Starwhale Platform. *_*

    检查 Starwhale Server 状态

    Minikube 方式启动 Starwhale Server 一般要用时3-5分钟,可以输出如下命令检查是否完成启动:

    kubectl get deployments -n starwhale
    NAMEREADYUP-TO-DATEAVAILABLEAGE
    controller1/1115m
    minio1/1115m
    mysql1/1115m

    本机访问的网络配置

    输出如下命令后,就可以在浏览器中通过 http://controller.starwhale.svc 访问 Starwhale Server:

    echo "$(sudo minikube ip) controller.starwhale.svc minio.starwhale.svc  minio-admin.starwhale.svc " | sudo tee -a /etc/hosts

    其他机器访问的网络配置

    • 步骤1: 在 Starwhale Server 所在机器上

      使用 socat 命令做临时的端口转发,命令如下:

      # install socat at first, ref: https://howtoinstall.co/en/socat
      sudo socat TCP4-LISTEN:80,fork,reuseaddr,bind=0.0.0.0 TCP4:`minikube ip`:80

      当您停掉socat进程后,端口转发会被禁止,其他机器的访问也会被禁止。如果想长期开启端口转发,可以使用 iptables 命令。

    • 步骤2: 在其他机器上

      在 hosts 文件添加相关域名映射,命令如下:

      # for macOSX or Linux environment, run the command in the shell.
      echo ${your_machine_ip} controller.starwhale.svc minio.starwhale.svc minio-admin.starwhale.svc " | sudo tee -a /etc/hosts

      # for Windows environment, run the command in the PowerShell with administrator permission.
      Add-Content -Path C:\Windows\System32\drivers\etc\hosts -Value "`n${your_machine_ip} controller.starwhale.svc minio.starwhale.svc minio-admin.starwhale.svc"
    - + \ No newline at end of file diff --git a/zh/0.5.10/server/installation/starwhale_env/index.html b/zh/0.5.10/server/installation/starwhale_env/index.html index 4512b0776..510ed1661 100644 --- a/zh/0.5.10/server/installation/starwhale_env/index.html +++ b/zh/0.5.10/server/installation/starwhale_env/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    Starwhale 环境变量文件示例

    ################################################################################
    # *** Required ***
    # The external Starwhale server URL. For example: https://cloud.starwhale.ai
    SW_INSTANCE_URI=

    # The listening port of Starwhale Server
    SW_CONTROLLER_PORT=8082

    # The maximum upload file size. This setting affects datasets and models uploading when copied from outside.
    SW_UPLOAD_MAX_FILE_SIZE=20480MB
    ################################################################################
    # The base URL of the Python Package Index to use when creating a runtime environment.
    SW_PYPI_INDEX_URL=http://10.131.0.1/repository/pypi-hosted/simple/

    # Extra URLs of package indexes to use in addition to the base url.
    SW_PYPI_EXTRA_INDEX_URL=

    # Space separated hostnames. When any host specified in the base URL or extra URLs does not have a valid SSL
    # certification, use this option to trust it anyway.
    SW_PYPI_TRUSTED_HOST=
    ################################################################################
    # The JWT token expiration time. When the token expires, the server will request the user to login again.
    SW_JWT_TOKEN_EXPIRE_MINUTES=43200

    # *** Required ***
    # The JWT secret key. All strings are valid, but we strongly recommend you to use a random string with at least 16 characters.
    SW_JWT_SECRET=
    ################################################################################
    # The Kubernetes namespace to use when running a task
    SW_K8S_NAME_SPACE=default

    # The path on the Kubernetes host node's filesystem to cache Python packages. Use the setting only if you have
    # the permission to use host node's filesystem. The runtime environment setup process may be accelerated when the host
    # path cache is used. Leave it blank if you do not want to use it.
    SW_K8S_HOST_PATH_FOR_CACHE=

    ###############################################################################
    # *** Required ***
    # The object storage system type. Valid values are:
    # s3: [AWS S3](https://aws.amazon.com/s3) or other s3-compatible object storage systems
    # aliyun: [Aliyun OSS](https://www.alibabacloud.com/product/object-storage-service)
    # minio: [MinIO](https://min.io)
    # file: Local filesystem
    SW_STORAGE_TYPE=

    # The path prefix for all data saved on the storage system.
    SW_STORAGE_PREFIX=
    ################################################################################
    # The following settings are only used when SW_STORAGE_TYPE is file.

    # The root directory to save data.
    # This setting is only used when SW_STORAGE_TYPE is file.
    SW_STORAGE_FS_ROOT_DIR=/usr/local/starwhale
    ################################################################################
    # The following settings are only used when SW_STORAGE_TYPE is not file.

    # *** Required ***
    # The name of the bucket to save data.
    SW_STORAGE_BUCKET=

    # *** Required ***
    # The endpoint URL of the object storage service.
    # This setting is only used when SW_STORAGE_TYPE is s3 or aliyun.
    SW_STORAGE_ENDPOINT=

    # *** Required ***
    # The access key used to access the object storage system.
    SW_STORAGE_ACCESSKEY=

    # *** Required ***
    # The secret access key used to access the object storage system.
    SW_STORAGE_SECRETKEY=

    # *** Optional ***
    # The region of the object storage system.
    SW_STORAGE_REGION=

    # Starwhale Server will use multipart upload when uploading a large file. This setting specifies the part size.
    SW_STORAGE_PART_SIZE=5MB
    ################################################################################
    # MySQL settings

    # *** Required ***
    # The hostname/IP of the MySQL server.
    SW_METADATA_STORAGE_IP=

    # The port of the MySQL server.
    SW_METADATA_STORAGE_PORT=3306

    # *** Required ***
    # The database used by Starwhale Server
    SW_METADATA_STORAGE_DB=starwhale

    # *** Required ***
    # The username of the MySQL server.
    SW_METADATA_STORAGE_USER=

    # *** Required ***
    # The password of the MySQL server.
    SW_METADATA_STORAGE_PASSWORD=
    ################################################################################
    - + \ No newline at end of file diff --git a/zh/0.5.10/server/project/index.html b/zh/0.5.10/server/project/index.html index 1ea651aa5..308e00a07 100644 --- a/zh/0.5.10/server/project/index.html +++ b/zh/0.5.10/server/project/index.html @@ -10,7 +10,7 @@ - + @@ -18,7 +18,7 @@
    版本:0.5.10

    Project Management

    Project type

    There are two types of projects:

    • Public: Visible to anyone. Everyone on the internet can find and see public projects.

    • Private: Visible to users specified in the project member settings. Private projects can only be seen by project owners and project members. The project owner can manage access in the project setting of Manage Member.

    Create a project

    1 Sign in to Starwhale, click Create Project.

    creat

    2 Type a name for the project.

    image

    提示

    Avoid duplicate project names.For more information, see Names in Starwhale

    3 Select project visibility to decide who can find and see the project.

    image

    4 Type a description. It is optional.

    image

    5 To finish, click Submit.

    image

    Edit a project

    The name, privacy and description of a project can be edited.

    提示

    Users with the project owner or maintainer role can edit a project. For more information, see Roles and permissions

    Edit name

    • If you are on the project list page:

      1 Hover your mouse over the project you want to edit, then click the Edit button.

      image

      2 Enter a new name for the project.

      image

      提示

      Avoid duplicate project names. For more information, see Names in Starwhale

      3 Click Submit to save changes.

      image

      4 If you're editing multiple projects, repeat steps 1 through 3.

    • If you are on a specific project:

      1 Select Overview on the left navigation, and click Edit.

      image

      2 Enter a new name for the project.

      image

      提示

      Avoid duplicate project names. For more information, see Names in Starwhale

      3 Click Submit to save changes.

      image

    Edit privacy

    • If you are on the project list page:

      1 Hover your mouse over the project you want to edit, then click the Edit button.

      image

      2 Click the Public or Private by your command. For more information, see Project types.

      image

      3 Click Submit to save changes.

      image

    • If you are on a specific project

      1 Select Overview on the left navigation, and click Edit.

      image

      2 Click the Public or Private by your command. For more information, see Project types.

      image

      3 Click Submit to save changes.

      image

    Edit description

    • If you are on the project list page:

      1 Hover your mouse over the project you want to edit, then click the Edit button.

      image

      2 Enter any description you want to describe the project.

      image

      3 Click Submit to save changes.

      image

    • If you are on a specific project

      1 Select Overview on the left navigation, and click Edit.

      image

      2 Enter any description you want to describe the project.

      image

      3 Click Submit to save changes.

      image

    Delete a project

    1 Hover your mouse over the project you want to delete, then click the Delete button.

    image

    2 If you are sure to delete, type the exact name of the project and then click Confirm to delete the project.

    image

    :::Important: When you delete a project, all the models, datasets, evaluations and runtimes belonging to the project will also be deleted and can not be restored. Be careful about the action. :::

    Manage project member

    Only users with the admin role can assign people to the project. The project owner defaulted to having the project owner role.

    Add a member to the project

    1 On the project list page or overview tab, click the Manage Member button, then Add Member.

    image

    image

    2 Type the username you want to add to the project, then click a name in the list of matches.

    image

    3 Select a project role for the member from the drop-down menu.For more information, see Roles and permissions

    image

    4 To finish, click Submit.

    image

    Remove a member

    1 On the project list page or project overview tab, click the Manage Member button.

    image

    2 Find the username you want to remove in the search box, click Remove, then Yes.

    image

    - + \ No newline at end of file diff --git a/zh/0.5.10/swcli/config/index.html b/zh/0.5.10/swcli/config/index.html index 11cbd997b..ee5e2bb3c 100644 --- a/zh/0.5.10/swcli/config/index.html +++ b/zh/0.5.10/swcli/config/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    配置文件

    Standalone Instance 是安装在用户的笔记本或开发服务器上,以Linux/Mac用户为粒度进行隔离。用户通过 pip 命令安装 Starwhale Python package 并执行任意 swcli 命令后,就可以在 ~/.config/starwhale/config.yaml 中查看该用户的 Starwhale 配置。绝大多数情况加用户不需要手工修改config.yaml文件

    ~/.config/starwhale/config.yaml 文件权限为 0o600,由于里面存有密钥信息,不建议用户修改该文件权限。您可以通过swci config edit来修改配置:

    swcli config edit

    config.yaml 例子

    典型的 config.yaml 文件内容如下:

    • 当前默认默认 Instance 为 local。
    • cloud-cn/cloud-k8s/pre-k8s 三个为 Cloud Instance,local 为 Standalone Instance。
    • Standalone 本地存储的根目录为 /home/liutianwei/.starwhale
    current_instance: local
    instances:
    cloud-cn:
    sw_token: ${TOKEN}
    type: cloud
    updated_at: 2022-09-28 18:41:05 CST
    uri: https://cloud.starwhale.cn
    user_name: starwhale
    user_role: normal
    cloud-k8s:
    sw_token: ${TOKEN}
    type: cloud
    updated_at: 2022-09-19 16:10:01 CST
    uri: http://cloud.pre.intra.starwhale.ai
    user_name: starwhale
    user_role: normal
    local:
    current_project: self
    type: standalone
    updated_at: 2022-06-09 16:14:02 CST
    uri: local
    user_name: liutianwei
    pre-k8s:
    sw_token: ${TOKEN}
    type: cloud
    updated_at: 2022-09-19 18:06:50 CST
    uri: http://console.pre.intra.starwhale.ai
    user_name: starwhale
    user_role: normal
    link_auths:
    - ak: starwhale
    bucket: users
    connect_timeout: 10.0
    endpoint: http://10.131.0.1:9000
    read_timeout: 100.0
    sk: starwhale
    type: s3
    storage:
    root: /home/liutianwei/.starwhale
    version: '2.0'

    config.yaml 字段说明

    参数说明类型默认值是否必须
    current_instance默认使用的instance名字,一般用 swcli instance select 命令设置Stringself
    instances管理的 Instances,包括 Standalone, Server 和 Cloud Instance,至少会有 Standalone Instance(名称为local),Server/Cloud Instance有一个或多个,一般用 swcli instance login 登陆一个新的instance,swcli instance logout 退出一个instanceDictStandalone Instance,名称为local
    instances.{instance-alias-name}.sw_token登陆Token,只对Server/Cloud Instance生效,后续swcli对Server/Cloud Instance进行操作时都会使用该Token。需要注意Token有过期时间,默认1个月,可以在Server/Cloud Instance中进行设置StringCloud-是,Standalone-否
    instances.{instance-alias-name}.typeinstance类型,目前只能填写 cloudstandaloneChoice[String]
    instances.{instance-alias-name}.uri对于Server/Cloud Instance,uri是http/https地址,对于Standalone Instance,uri是 localString
    instances.{instance-alias-name}.user_name用户名String
    instances.{instance-alias-name}.current_project当前Instance下默认的Project是什么,在URI的表述中会作为project字段进行默认填充,可以通过 swcli project select 命令进行设置String
    instances.{instance-alias-name}.user_role用户角色Stringnormal
    instances.{instance-alias-name}.updated_at该条Instance配置更新时间时间格式字符串
    storage与本地存储相关的设置Dict
    storage.rootStandalone Instance本地存储的根目录。通常情况下,当home目录空间不足,手工把数据文件移动到其他位置时,可以修改该字段String~/.starwhale
    versionconfig.yaml的版本,目前仅支持2.0String2.0

    Standalone Instance 的文件存储结构

    ${storage.root} 目录中存储了 Standalone Instance 所有的用户数据,包括 Project、Runtime、Model、Dataset、Evaluation 等用户直接感知的数据,也包括 ObjectStore、DataStore 等 Starwhale 后台实现的存储。具体说明如下:

    +-- ${storage.root}
    | +-- .objectstore --> 存储数据集chunk文件的简单存储,使用blake2b hash算法
    | | +-- blake2b --> hash算法名称
    | | | +-- 00 --> hash2位前缀
    | | | | +-- 0019ad58... --> object文件,文件名是文件内容的hash值
    | | | +-- 05
    | +-- .datastore --> 基于pyarrow的列式存储
    | | +-- project
    | | | +-- self --> 按照project名称进行分类存储
    | | | | +-- dataset --> 数据集相关的datastore存储,一般用来存储数据集的索引信息
    | | | | +-- eval --> 模型评测结果存储
    | +-- .recover --> 软删除某个project的存储目录,可以用 `swcli project recover` 进行恢复
    | +-- .tmp --> Dataset/Model/Runtime 构建过程中临时目录
    | +-- myproject --> 用户创建的project,所有myproject信息都存储在该目录
    | +-- self --> Standalone Instance自动创建的project
    | | +-- dataset --> swds数据集存储目录
    | | +-- evaluation --> 模型评测配置文件、日志等存储目录
    | | +-- model --> swmp模型包存储目录
    | | +-- runtime --> swrt环境包存储目录
    | | +-- workdir --> 解压、复原包文件的目录
    | | | +-- model --> swmp解压后的目录
    | | | +-- runtime --> swrt解压后的目录,若进行runtime restore操作,生成的venv或conda隔离环境,也会存放在该目录中

    有时候您可能需要用到 starwhale.Link 来存储一些信息。理论上,Link里面的URI可以是任意的合法 URI(星鲸目前只支持S3协议族和HTTP),比如s3://10.131.0.1:9000/users/path。然而,有些 Link是需要鉴权才能访问的。 link_auths 就是用来存放这些鉴权信息的。

    link_auths:
    - type: s3
    ak: starwhale
    bucket: users
    region: local
    connect_timeout: 10.0
    endpoint: http://10.131.0.1:9000
    read_timeout: 100.0
    sk: starwhale

    link_auths 里面的每一条都会自动匹配您的URI。 目前 S3 类型的鉴权信息通过 bucketendpoint 来匹配 URI。

    - + \ No newline at end of file diff --git a/zh/0.5.10/swcli/index.html b/zh/0.5.10/swcli/index.html index cda7c350d..7f77e45f9 100644 --- a/zh/0.5.10/swcli/index.html +++ b/zh/0.5.10/swcli/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    Starwhale Client (swcli) 用户指南

    swcli 是一个命令行工具,可让您与 Starwhale 实例进行交互。您可以使用 swcli 完成 Starwhale 中几乎所有的任务。swcli 是用纯 Python3 编写的(需要 Python 3.7 ~ 3.11),因此可以通过 pip 命令轻松安装。目前,swcli 仅支持 Linux 和 macOS,Windows版本即将推出。

    - + \ No newline at end of file diff --git a/zh/0.5.10/swcli/installation/index.html b/zh/0.5.10/swcli/installation/index.html index 7f3ef9496..0f85a0713 100644 --- a/zh/0.5.10/swcli/installation/index.html +++ b/zh/0.5.10/swcli/installation/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    安装指南

    swcli 命令行工具能够对各种实例完成几乎所有的操作,由于是由纯 Python3 编写,可以使用 pip 命令完成安装,本文会提供一些安装建议,帮助您获得一个干净的、无依赖冲突的 swcli Python 环境。

    安装建议

    非常不建议将 Starwhale 安装在系统的全局 Python 环境中,可能会导致 Python 的依赖冲突问题。使用 venv 或 conda 创建一个隔离的 Python 环境,并在其中安装 Starwhale,是 Python 推荐的做法。

    先决条件

    • Python3.7 ~ 3.11
    • Linux 或 macOS
    • Conda(可选)

    在Ubuntu系统中,可以运行以下命令:

    sudo apt-get install python3 python3-venv python3-pip

    #如果您想安装多个python版本
    sudo add-apt-repository -y ppa:deadsnakes/ppa
    sudo apt-get update
    sudo apt-get install -y python3.7 python3.8 python3.9 python3-pip python3-venv python3.8-venv python3.7-venv python3.9-venv

    swcli 可以在 macOS 下工作,包括 arm(M1 Chip) 和 x86(Intel Chip) 两种体系结构。但 macOS 下自带的 Python3 可能会遇到一些 Python 自身的问题,推荐使用 homebrew 进行安装:

    brew install python3

    安装 swcli

    使用venv安装

    venv 环境即可以使用 Python3 自带的 venv,也可以使用 virtualenv 工具。

    python3 -m venv ~/.cache/venv/starwhale
    source ~/.cache/venv/starwhale/bin/activate
    python3 -m pip install starwhale

    swcli --version

    sudo rm -rf /usr/local/bin/swcli
    sudo ln -s `which swcli` /usr/local/bin/

    使用conda安装

    conda create --name starwhale --yes  python=3.9
    conda activate starwhale
    python3 -m pip install starwhale

    swcli --version

    sudo rm -rf /usr/local/bin/swcli
    sudo ln -s `which swcli` /usr/local/bin/

    👏 现在,您可以在全局环境中使用 swcli 了。

    swcli 的特定场景依赖安装

    # 针对Audio处理, 主要包含soundfile库等
    python -m pip install starwhale[audio]

    # 针对Image处理,主要包含pillow库等
    python -m pip install starwhale[pillow]

    # 针对swcli model server命令
    python -m pip install starwhale[server]

    # 针对内建的Online Serving
    python -m pip install starwhale[online-serve]

    # 安装全部依赖
    python -m pip install starwhale[all]

    更新 swcli

    #适用于venv环境
    python3 -m pip install --upgrade starwhale

    #适用于conda环境
    conda run -n starwhale python3 -m pip install --upgrade starwhale

    卸载swcli

    python3 -m pip remove starwhale

    rm -rf ~/.config/starwhale
    rm -rf ~/.starwhale
    - + \ No newline at end of file diff --git a/zh/0.5.10/swcli/swignore/index.html b/zh/0.5.10/swcli/swignore/index.html index f14eeaba6..c710201b2 100644 --- a/zh/0.5.10/swcli/swignore/index.html +++ b/zh/0.5.10/swcli/swignore/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    关于 .swignore 文件

    .swignore 文件与 .gitignore, .dockerignore 等文件类似,都是用来定义忽略某些文件或文件夹。.swignore 文件主要应用在 Starwhale 的模型构建过程中。默认情况下,swcli model build 命令 或 starwhale.model.build() Python SDK会遍历指定目录下的所有文件,并自动排除一些已知的、不适合放入模型包中的文件或目录。

    文件格式

    • swignore文件中的每一行指定一个匹配文件和目录的模式。
    • 空行不匹配任何文件,因此它可以作为可读性的分隔符。
    • 星号*匹配除斜杠以外的任何内容。
    • #开头的行作为注释。
    • 支持wildcard的表达,类似 *.jpg, *.png

    默认下自动排除的文件或目录

    如果不想排除这些文件,可以构建模型 (swcli model build 命令) 的时候增加 --add-all 参数。

    • __pycache__/
    • *.py[cod]
    • *$py.class
    • venv安装目录
    • conda安装目录

    例子

    这是MNIST示例中使用的.swignore文件:

    venv/*
    .git/*
    .history*
    .vscode/*
    .venv/*
    data/*
    .idea/*
    *.py[cod]
    - + \ No newline at end of file diff --git a/zh/0.5.10/swcli/uri/index.html b/zh/0.5.10/swcli/uri/index.html index 80575695d..6ab7184a6 100644 --- a/zh/0.5.10/swcli/uri/index.html +++ b/zh/0.5.10/swcli/uri/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.10

    Starwhale 资源URI

    提示

    资源 URI 在 Starwhale Client 中被广泛使用。URI 可以引用本地实例中的资源或远程实例中的任何其他资源。 这样 Starwhale Client 就可以轻松操作任何资源。

    concepts-org.jpg

    实例URI

    实例 URI 可以是以下形式之一:

    • local: 指本地的 Standalone 实例.
    • [http(s)://]<hostname or ip>[:<port>]:指向一个 Starwhale Cloud 实例。
    • [cloud://]<cloud alias>:Server或Cloud的实例别名,可以在实例登录阶段配置。
    警告

    “local”不同于“localhost”,前者为 Standalone 实例,而后者是一个 URL ,指向本地运行的 Starwhale Server 实例。

    例子:

    # 登录Starwhale Cloud,别名为swcloud
    swcli instance login --username <your account name> --password <your password> https://cloud.starwhale.cn --alias swcloud

    # 将模型从本地实例复制到云实例
    swcli model copy mnist/version/latest swcloud/project/<your account name>:demo

    # 将运行时复制到Starwhale Server实例:http://localhost:8081
    swcli runtime copy pytorch/version/v1 http://localhost:8081/project/<your account name>:demo

    项目URI

    项目URI的格式为“[<实例URI>/project/]<project name>”。 如果未指定实例 URI,则使用当前实例。

    例子:

    swcli project select self   # 选择当前实例中的self项目
    swcli project info local/project/self # 查看本地实例中的self项目信息

    模型/数据集/运行时URI

    • 模型URI: [<项目URI>/model/]<model name>[/version/<version id|tag>].
    • 数据集URI: [<项目URI>/dataset/]<dataset name>[/version/<version id|tag>].
    • 运行时URI: [<项目URI>/runtime/]<runtime name>[/version/<version id|tag>].
    提示
    • swcli 支持更加人性化的短版本ID。您可以只键入版本ID的前几个字符,前提是它至少有四个字符长且唯一指向某个版本ID。但是,recover 命令必须使用完整的版本ID。
    • 如果未指定项目URI,将使用默认项目
    • 您始终可以使用版本标签而不是版本ID。

    例子:

    swcli model info mnist/version/hbtdenjxgm4ggnrtmftdgyjzm43tioi  # 检查模型信息,模型名称:mnist,版本:hbtdenjxgm4ggnrtmftdgyjzm43tioi
    swcli model remove mnist/version/hbtdenj # 使用短版本ID
    swcli model info mnist # 检查mnist模型信息
    swcli model run mnist --runtime pytorch-mnist --dataset mnist # 使用latest的默认tag

    作业URI

    • 格式: [<项目URI>/job/]<job id>.
    • 如果未指定项目URI,将使用默认项目。

    例子:

    swcli job info mezdayjzge3w   # 查看默认实例和默认项目中的mezdayjzge3w版本
    swcli job info local/project/self/job/mezday # 检查本地实例,self项目,作业id:mezday

    默认实例

    当项目URI中的实例部分被省略时,将使用默认实例进行替代。默认实例是由 swcli instance loginswcli instance use 指定的。

    默认项目

    当模型/数据集/运行时/评估URI的项目部分被省略时,将使用默认项目。默认项目是指通过 swcli project use 命令选择的项目。

    - + \ No newline at end of file diff --git a/zh/0.5.12/cloud/billing/bills/index.html b/zh/0.5.12/cloud/billing/bills/index.html index 16583a683..1fcdd655c 100644 --- a/zh/0.5.12/cloud/billing/bills/index.html +++ b/zh/0.5.12/cloud/billing/bills/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    账单明细

    账单明细查看

    登录账户中心,点击“账户管理”,可在账户概览页面查看最近账单明细,点击"全部账单”,可跳转查看全部账单明细。

    image

    image

    账单明细字段说明

    • 账单编号:账单的唯一标识
    • 资源:用户所使用的各类资源
    • 资源明细:使用资源运行的作业
    • 消费时间:账单开始时间至账单结束时间
    • 计费项:用户所用的产品或服务所含的具体的计费项目
    • 单价:产品或服务的单价
    • 单价单位:产品或服务单价的单位
    • 用量:产品或服务的使用量
    • 用量单位:产品或服务使用量的单位
    • 状态:账单的支付状态,分为:未结清、已结清、未结算
    - + \ No newline at end of file diff --git a/zh/0.5.12/cloud/billing/index.html b/zh/0.5.12/cloud/billing/index.html index c39a7a772..fe97ebb78 100644 --- a/zh/0.5.12/cloud/billing/index.html +++ b/zh/0.5.12/cloud/billing/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    产品计费概述

    Starwhale 支持按量付费,按量付费是一种先使用后付费的计费方式。通过按量付费,您可以按资源使用量付费,不需要提前购买大量资源。

    计费说明

    计费项

    Starwhale 根据您选购的资源规格(CPU、GPU、内存)及使用时长进行计费。

    计费方式

    Starwhale 支持按量付费,按量付费是一种先使用后付费的计费方式。通过按量付费,您可以按资源使用量付费,不需要提前购买大量资源。

    按量付费主要按照资源计费周期计费,在每个结算周期生成账单并从账户中扣除相应费用。创建 Job 时,需要确定计算资源配置。

    请确保您在 Job 运行期间可用余额充足,如果在 Job 运行过程中,您的账户余额不足,会导致 Job 无法完成并按照已运行时长收费。

    开通要求

    按照按量付费创建 Job 前,您的 Starwhale 账户可用余额不得小于一个计费周期。

    说明:账户可用余额=充值金额+代金券金额-已消费金额-已退款金额-已冻结金额

    计费周期

    每5分钟为一个计费周期,不足5分钟则按5分钟计算,按照 Job 运行时长结算。

    计费时长

    从 Job 开始运行时计费,到 Job 运行结束后结束计费

    账单明细

    登录账户中心,点击“账户管理”,可在账户概览页面查看最近账单明细,点击"全部账单”,可跳转查看全部账单明细。详细操作流程请参见账单明细

    欠费说明

    如果账号内存在欠费账单,您无法继续使用计算资源。尽快充值结清欠费账单后可继续使用。

    查看欠费金额

    1 登录账户中心

    2 在账户概览可查看欠费金额

    退款说明

    现金余额支持退款

    需要登录账户中心,点击账户管理>充值订单,可退款的充值订单会在操作列显示退款按钮。点击可发起退款申请,详细操作流程请参见申请退款

    - + \ No newline at end of file diff --git a/zh/0.5.12/cloud/billing/recharge/index.html b/zh/0.5.12/cloud/billing/recharge/index.html index ec5800b63..d39bc12ab 100644 --- a/zh/0.5.12/cloud/billing/recharge/index.html +++ b/zh/0.5.12/cloud/billing/recharge/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    充值和退款

    充值渠道

    Starwhale目前支持通过微信渠道进行充值。

    充值操作步骤

    操作路径:

    1 登录账户中心,点击“去充值”,可跳转至充值页面。

    image

    2 选择或者输入充值金额,充值金额需要大于50元,同时注意支付渠道的限额(超过限额会无法支付成功)。

    image

    3 选择充值方式,点击“确认充值”,跳转第三方支付渠道完成付款。

    image

    充值订单

    查看充值订单

    操作路径:

    1 登录账户中心,点击“账户管理”,可在账户概览页面查看最近充值订单,点击"全部订单,可跳转查看全部充值订单。

    image

    image

    继续支付充值订单

    如您在充值页面,点击“开始充值”后,因某些原因没有支付,可在30分钟内继续支付。

    操作路径:

    1 登录账户中心,点击“账户管理”,点击账户管理>充值订单菜单栏进入充值订单列表。

    image

    2 找到需要继续支付的订单,点击“继续支付

    image

    3 选择充值方式,点击“确认充值”,跳转第三方支付渠道完成付款。

    image

    取消充值订单

    操作路径:

    1 登录账户中心,点击“账户管理”,点击账户管理>充值订单菜单栏进入充值订单列表。

    image

    2 找到需要取消的订单,点击“取消”,弹出确认弹窗后,点击“”,可取消充值订单。

    image

    - + \ No newline at end of file diff --git a/zh/0.5.12/cloud/billing/refund/index.html b/zh/0.5.12/cloud/billing/refund/index.html index 80aed2cb8..e56228168 100644 --- a/zh/0.5.12/cloud/billing/refund/index.html +++ b/zh/0.5.12/cloud/billing/refund/index.html @@ -10,7 +10,7 @@ - + @@ -18,7 +18,7 @@
    版本:0.5.12

    账户退款

    申请退款

    1 登录账户中心,点击“账户管理”,点击账户管理>充值订单菜单栏进入充值订单列表。

    image

    2 找到需要退款的订单,点击“退款”,填写退款原因,确认退款金额,可申请退款。

    ::: tips: 退订款项将原路退回,即通过微信支付的订单会退回到支付使用的微信 :::

    image

    image

    image

    image

    - + \ No newline at end of file diff --git a/zh/0.5.12/cloud/billing/voucher/index.html b/zh/0.5.12/cloud/billing/voucher/index.html index 3fbf5e7d0..e105f1a78 100644 --- a/zh/0.5.12/cloud/billing/voucher/index.html +++ b/zh/0.5.12/cloud/billing/voucher/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    代金券

    什么是代金券

    代金券是starwhale以虚拟券形式给予客户的资金类权益,可用于抵扣运行时所使用资源的费用。

    如何查看我的代金券?

    登录Starwhale,进入“账户中心>代金券” 可查看代金券的编号,面值,余额,状态等信息。

    image

    点击右侧操作列“使用明细”打开“代金券使用明细页”,查看该代金券的交易时间、编号、支出等抵扣详细记录。

    image

    如何使用代金券?

    代金券适用于抵扣消费,如果您的Starwhale账户内有代金券,系统会优先抵扣代金券金额,代金券余额为0后会抵扣充值余额。

    - + \ No newline at end of file diff --git a/zh/0.5.12/cloud/index.html b/zh/0.5.12/cloud/index.html index 1fe8e4a23..a959ee023 100644 --- a/zh/0.5.12/cloud/index.html +++ b/zh/0.5.12/cloud/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    Starwhale Cloud 用户指南

    Starwhale Cloud 是托管在公有云上的服务,由 Starwhale 团队负责运维,访问地址是 https://cloud.starwhale.cn

    - + \ No newline at end of file diff --git a/zh/0.5.12/community/contribute/index.html b/zh/0.5.12/community/contribute/index.html index 515e0fb1f..a156386da 100644 --- a/zh/0.5.12/community/contribute/index.html +++ b/zh/0.5.12/community/contribute/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    Starwhale 开源贡献指南

    参与贡献

    Starwhale 非常欢迎来自开源社区的贡献,包括但不限于以下方式:

    • 描述使用过程中的遇到的问题
    • 提交Feature Request
    • 参与Slack和Github Issues讨论
    • 参与Code Review
    • 改进文档和示例程序
    • 修复程序Bug
    • 增加Test Case
    • 改进代码的可读性
    • 开发新的Features
    • 编写Enhancement Proposal

    可以通过以下方式参与开发者社区,获取最新信息和联系Starwhale开发者:

    Starwhale社区使用Github Issues来跟踪问题和管理新特性的开发。可以选择"good first issue"或"help wanted"标签的issue,作为参与开发Starwhale的起点。

    Starwhale资源列表

    代码基本结构

    核心目录组织及功能说明如下:

    • client:swcli和Python SDK的实现,使用Python3编写,对应Starwhale Standalone Instance的所有功能。
      • api:Python SDK的接口定义和实现。
      • cli:Command Line Interface的入口点。
      • base:Python 端的一些基础抽象。
      • core:Starwhale 核心概念的实现,包括Dataset、Model、Runtime、Project、Job、Evaluation等。
      • utils:Python 端的一些工具函数。
    • console:前端的实现,使用React + TypeScript编写,对应Starwhale Cloud Instance的Web UI。
    • server:Starwhale Controller的实现,使用Java编写,对应Starwhale Cloud Instance的后端API。
    • docker:Helm Charts,绝大多数Docker Image的Dockerfile等。
    • docs:Starwhale官方文档。
    • example:示例程序,包含MNIST等例子。
    • scripts:一些Bash和Python脚本,用来进行E2E测试和软件发布等。

    Fork&Clone Starwhale仓库

    您需要fork Starwhale仓库代码并clone到本机,

    搭建针对Standalone Instance的本地开发环境

    Standalone Instance采用Python编写,当要修改Python SDK和swcli时,需要进行相应的环境搭建。

    Standalone本地开发环境前置条件

    • OS:Linux或macOS
    • Python:3.7~3.11
    • Docker:>=19.03 (非必须,当调试dockerize、生成docker image或采用docker为载体运行模型任务时需要)
    • Python隔离环境:Python venv 或 virtualenv 或 conda等都可以,用来构建一个隔离的Python环境

    从源码进行安装

    基于上一步clone到本地的仓库目录:starwhale,并进入到client子目录:

    cd starwhale/client

    使用Conda创建一个Starwhale开发环境,或者使用venv/virtualenv等创建:

    conda create -n starwhale-dev python=3.8 -y
    conda activate starwhale-dev

    安装Client包及依赖到starwhale-dev环境中:

    make install-sw
    make install-dev-req

    输入swcli --version命令,观察是否安装成功,开发环境的swcli版本是 0.0.0.dev0

    ❯ swcli --version
    swcli, version 0.0.0.dev0

    ❯ swcli --version
    /home/username/anaconda3/envs/starwhale-dev/bin/swcli

    本地修改代码

    现在可以对Starwhale代码进行修改,不需要重复安装(make install-sw命令)就能在当前starwhale-dev环境是测试cli或sdk。Starwhale Repo中设置了 .editorconfig 文件,大部分IDE或代码编辑器会自动支持该文件的导入,采用统一的缩进设置。

    执行代码检查和测试

    starwhale 目录中操作,会执行单元测试、client的e2e测试、mypy检查、flake8检查和isort检查等。

    make client-all-check

    搭建针对Cloud Instance的本地开发环境

    Cloud Instance的后端采用Java编写,前端采用React+TypeScript编写,可以按需搭建相应的开发环境。

    搭建前端Console开发环境

    搭建后端Server开发环境

    • 开发语言:Java
    • 项目构建工具:Maven
    • 开发框架:Spring Boot+Mybatis
    • 测试框架:Junit5(其中mock框架为mockito,断言部分使用hamcrest,数据库、web服务等模拟使用Testcontainers)
    • 代码检查:使用maven插件 maven-checkstyle-plugin

    Server开发环境前置条件

    • OS:Linux、macOS或Windows
    • JDK: >=11
    • Docker:>=19.03
    • Maven:>=3.8.1
    • Mysql:>=8.0.29
    • Minio
    • Kubernetes cluster/Minikube(如果没有k8s集群,可以使用Minikube作为开发调试时的备选方案)

    修改代码并增加单测

    现在可以进入到相应模块,对server端的代码进行修改、调整。其中业务功能代码位置为src/main/java,单元测试目录为src/test/java。

    执行代码检查和单元测试

    cd starwhale/server
    mvn clean package

    本地部署服务

    • 前置服务

      • Minikube(可选,无k8s集群时可使用此服务,安装方式可见:Minikube

        minikube start
        minikube addons enable ingress
        minikube addons enable ingress-dns
      • Mysql

        docker run --name sw-mysql -d \
        -p 3306:3306 \
        -e MYSQL_ROOT_PASSWORD=starwhale \
        -e MYSQL_USER=starwhale \
        -e MYSQL_PASSWORD=starwhale \
        -e MYSQL_DATABASE=starwhale \
        mysql:latest
      • Minio

        docker run --name minio -d
        -p 9000:9000 --publish 9001:9001
        -e MINIO_DEFAULT_BUCKETS='starwhale'
        -e MINIO_ROOT_USER="minioadmin"
        -e MINIO_ROOT_PASSWORD="minioadmin"
        bitnami/minio:latest
    • 打包server程序

      若部署server端时,需要把前端同时部署上,可先执行前端部分的构建命令,然后执行'mvn clean package',则会自动将已编译好的前端文件打包进来。

      使用如下命令对程序进行打包:

      cd starwhale/server
      mvn clean package
    • 指定server启动所需的环境变量

      # Minio相关配置
      export SW_STORAGE_ENDPOINT=http://${Minio IP,默认为127.0.0.1}:9000
      export SW_STORAGE_BUCKET=${Minio bucket,默认为starwhale}
      export SW_STORAGE_ACCESSKEY=${Minio accessKey,默认为starwhale}
      export SW_STORAGE_SECRETKEY=${Minio secretKey,默认为starwhale}
      export SW_STORAGE_REGION=${Minio region,默认为local}
      # kubernetes配置
      export KUBECONFIG=${.kube配置文件所在路径}\.kube\config

      export SW_INSTANCE_URI=http://${Server服务所在机器IP}:8082
      # Mysql相关配置
      export SW_METADATA_STORAGE_IP=${Mysql IP,默认为127.0.0.1}
      export SW_METADATA_STORAGE_PORT=${Mysql port,默认为3306}
      export SW_METADATA_STORAGE_DB=${Mysql dbname,默认为starwhale}
      export SW_METADATA_STORAGE_USER=${Mysql user,默认为starwhale}
      export SW_METADATA_STORAGE_PASSWORD=${user password,默认为starwhale}
    • 部署server服务

      使用IDE或如下方式部署均可。

      java -jar controller/target/starwhale-controller-0.1.0-SNAPSHOT.jar
    • 功能调试

      这里有两种方式对修改的功能进行调试:

      • 使用swagger-ui进行接口调试,访问 /swagger-ui/index.html找到对应的api即可。
      • 或直接在ui访问,进行相应功能的调试(前提是打包时已经按说明将前端代码进行了提前构建)
    - + \ No newline at end of file diff --git a/zh/0.5.12/concepts/index.html b/zh/0.5.12/concepts/index.html index 50d8e3cc6..9d7ce3135 100644 --- a/zh/0.5.12/concepts/index.html +++ b/zh/0.5.12/concepts/index.html @@ -10,13 +10,13 @@ - + - + \ No newline at end of file diff --git a/zh/0.5.12/concepts/names/index.html b/zh/0.5.12/concepts/names/index.html index 548b35da1..ed7c1eb63 100644 --- a/zh/0.5.12/concepts/names/index.html +++ b/zh/0.5.12/concepts/names/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    Starwhale中的命名规则

    下文的命名是指对Starwhale中的项目、模型、数据集、运行时以及版本标签进行命名。

    名称限制

    • 名称不区分大小写。
    • 名称必须仅由大小写字母“A-Z a-z”、数字“0-9”、连字符“-”、点“.”和下划线“_”组成。
    • 名称应始终以字母或“_”字符开头。
    • 名称的最大长度为80。

    名称唯一性要求

    • 资源名称在其所影响范围内必须是唯一的。例如,项目名称在实例中必须是唯一的,模型名称在其所在项目中必须是唯一的。
    • 同一个项目下同类资源必须使用不同的名称,包括那些已删除的资源。 例如,项目“Apple”不能有两个名为“Alice”的模型,即使其中一个已经被删除。
    • 不同种类的资源可以有相同的名称。 例如,一个项目、一个模型和一个数据集可以同时被命名为“Alice”。
    • 不同项目的资源可以具有相同的名称。 例如,“Apple”项目中的模型和“Banana”项目中的模型可以具有相同的名称“Alice”。
    • 已经被垃圾回收的资源名称可以重复使用。 例如,将项目“Apple”中名称为“Alice”的模型移除并进行垃圾回收后,该项目可以有一个新的同名模型“Alice”。
    - + \ No newline at end of file diff --git a/zh/0.5.12/concepts/project/index.html b/zh/0.5.12/concepts/project/index.html index 3dd713b52..437949760 100644 --- a/zh/0.5.12/concepts/project/index.html +++ b/zh/0.5.12/concepts/project/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    Starwhale中的项目

    “项目”是组织不同资源(如模型、数据集等)的基本单位。您可以将项目用于不同的目的。例如,您可以为数据科学家团队、产品线或特定模型创建项目。用户通常在日常工作中会参与一个或多个项目。

    Starwhale Server/Cloud 项目按账号分组。Starwhale Standalone 没有帐号概念。所以您不会在S tarwhale Standalone 项目中看到任何帐号前缀。Starwhale Server/Cloud项目可以是“公共”或“私有”。公共项目意味着同一实例上的所有用户在默认情况下都自动成为该项目的“访客”角色。有关角色的更多信息,请参阅Starwhale中的角色和权限

    Starwhale Standalone会自动创建一个“self”项目并将其配置为默认项目。

    - + \ No newline at end of file diff --git a/zh/0.5.12/concepts/roles-permissions/index.html b/zh/0.5.12/concepts/roles-permissions/index.html index 6e3641087..bcd591152 100644 --- a/zh/0.5.12/concepts/roles-permissions/index.html +++ b/zh/0.5.12/concepts/roles-permissions/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    Starwhale中的角色和权限

    角色用于为用户分配权限。只有Starwhale Server/Cloud有角色和权限,Starwhale Standalone没有相应概念。系统会自动创建一个管理员角色并分配给默认用户“starwhale”。一些敏感操作只能由具有管理员角色的用户执行,例如在Starwhale Server中创建新的账号。

    每个项目具有三类角色:

    • 管理员Admin - 项目管理员可以读写项目数据并将项目角色分配给用户。
    • 维护者Maintainer - 项目维护者可以读写项目数据。
    • 访客Guest - 项目访客只能读取项目数据。
    动作管理员Admin维护者Maintainer访客Guest
    管理项目成员
    编辑项目
    查看项目
    创建评价
    删除评价
    查看评价
    创建数据集
    更新数据集
    删除数据集
    查看数据集
    创建模型
    更新模型
    删除模型
    查看型号
    创建运行时
    更新运行时间
    删除运行时
    查看运行时间

    创建项目的用户成为第一个项目管理员。他可以在这之后将角色分配给其他用户。

    - + \ No newline at end of file diff --git a/zh/0.5.12/concepts/versioning/index.html b/zh/0.5.12/concepts/versioning/index.html index 422015d7c..5d2f52420 100644 --- a/zh/0.5.12/concepts/versioning/index.html +++ b/zh/0.5.12/concepts/versioning/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    Starwhale中的资源版本控制

    • Starwhale管理所有模型、数据集和运行时的历史记录。对特定资源的每次更新都会附加一个新版本的历史记录。
    • 版本由version id标识。version id是由 Starwhale自动生成的随机字符串,并按其创建时间排序。
    • 版本可以有标签。Starwhale使用版本标签来提供人性化的版本表示。默认情况下,Starwhale会为每个版本附加一个默认标签。默认标记是字母“v”后跟一个数字。对于每个版本化的资源,第一个版本标签始终标记为“v0”,第二个版本标记为“v1”,依此类推。有一个特殊的标签“latest”总是指向最新的版本。删除版本后,将不会重复使用其默认标签。例如,有一个带有标签“v0、v1、v2”的模型。 删除“v2”后,标签将为“v0、v1”。 接下来一个标签将是“v3”而不是“v2”。您可以将自己定义的标签附加到任何版本并随时删除它们。
    • Starwhale使用线性历史,不提供分支。
    • Starwhale资源无法真正回滚。当要恢复某个历史版本时,Starwhale会复制该版本数据并将其作为新版本追加到历史记录的末尾。您可以手动删除和恢复历史版本。
    - + \ No newline at end of file diff --git a/zh/0.5.12/dataset/index.html b/zh/0.5.12/dataset/index.html index d9f014b38..e5d83dea8 100644 --- a/zh/0.5.12/dataset/index.html +++ b/zh/0.5.12/dataset/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    Starwhale 数据集

    设计概述

    Starwhale Dataset 定位

    Starwhale Dataset 包含数据构建、数据加载和数据可视化三个核心阶段,是一款面向ML/DL领域的数据管理工具。Starwhale Dataset 能直接使用 Starwhale Runtime 构建的环境,能被 Starwhale ModelStarwhale Evaluation 无缝集成,是 Starwhale MLOps 工具链的重要组成部分。

    根据 Machine Learning Operations (MLOps): Overview, Definition, and Architecture 对MLOps Roles的分类,Starwhale Dataset的三个阶段针对用户群体如下:

    • 数据构建:Data Engineer、Data Scientist
    • 数据加载:Data Scientist、ML Developer
    • 数据可视化:Data Engineer、Data Scientist、ML Developer

    mlops-users

    核心功能

    • 高效加载:数据集原始文件存储在 OSS 或 NAS 等外部存储上,使用时按需加载,不需要数据落盘。
    • 简单构建:既支持从 Image/Video/Audio 目录、json文件和 Huggingface 数据集等来源一键构建数据集,又支持编写 Python 代码构建完全自定义的数据集。
    • 版本管理:可以进行版本追踪、数据追加等操作,并通过内部抽象的 ObjectStore,避免数据重复存储。
    • 数据集分发:通过 swcli dataset copy 命令,实现 Standalone 实例和 Cloud/Server 实例的双向数据集分享。
    • 数据可视化:Cloud/Server 实例的 Web 界面中可以对数据集提供多维度、多类型的数据呈现。
    • 制品存储:Standalone 实例能存储本地构建或分发的 swds 系列文件,Cloud/Server 实例使用对象存储提供集中式的 swds 制品存储。
    • Starwhale无缝集成Starwhale Dataset 能使用 Starwhale Runtime 构建的运行环境构建数据集。Starwhale EvaluationStarwhale Model 直接通过 --dataset 参数指定数据集,就能完成自动数据加载,便于进行推理、模型评测等环境。

    关键元素

    • swds 虚拟包文件:swdsswmpswrt 不一样,不是一个打包的单一文件,而是一个虚拟的概念,具体指的是一个目录,是 Starwhale 数据集某个版本包含的数据集相关的文件,包括 _manifest.yaml, dataset.yaml, 数据集构建的Python脚本和数据文件的链接等。可以通过 swcli dataset info 命令查看swds所在目录。swds 是Starwhale Dataset 的简写。

    swds-tree.png

    • swcli dataset 命令行:一组dataset相关的命令,包括构建、分发和管理等功能,具体说明参考CLI Reference
    • dataset.yaml 配置文件:描述数据集的构建过程,可以完全省略,通过 swcli dataset build 参数指定,可以认为 dataset.yaml 是build命令行参数的一种配置文件表示方式。swcli dataset build 参数优先级高于 dataset.yaml
    • Dataset Python SDK:包括数据构建、数据加载和若干预定义的数据类型,具体说明参考Python SDK
    • 数据集构建的 Python 脚本:使用 Starwhale Python SDK 编写的用来构建数据集的一系列脚本。

    最佳实践

    Starwhale Dataset 的构建是独立进行的,如果编写构建脚本时需要引入第三方库,那么使用 Starwhale Runtime 可以简化 Python 的依赖管理,能保证数据集的构建可复现。Starwhale 平台会尽可能多的内建开源数据集,让用户 copy 下来数据集后能立即使用。

    命令行分组

    Starwhale Dataset 命令行从使用阶段的角度上,可以划分如下:

    • 构建阶段
      • swcli dataset build
    • 可视化阶段
      • swcli dataset diff
      • swcli dataset head
    • 分发阶段
      • swcli dataset copy
    • 基本管理
      • swcli dataset tag
      • swcli dataset info
      • swcli dataset history
      • swcli dataset list
      • swcli dataset summary
      • swcli dataset remove
      • swcli dataset recover

    Starwhale Dataset Viewer

    目前 Cloud/Server 实例中 Web UI 可以对数据集进行可视化展示,目前只有使用 Python SDK 的DataType 才能被前端正确的解释,映射关系如下:

    • Image:展示缩略图、放大图、MASK类型图片,支持 image/pngimage/jpegimage/webpimage/svg+xmlimage/gifimage/apngimage/avif 格式。
    • Audio:展示为音频wave图,可播放,支持 audio/mp3audio/wav 格式。
    • Video:展示为视频,可播放,支持 video/mp4video/avivideo/webm 格式。
    • GrayscaleImage:展示灰度图,支持 x/grayscale 格式。
    • Text:展示文本,支持 text/plain 格式,设置设置编码格式,默认为utf-8。
    • Binary和Bytes:暂不支持展示。
    • Link:上述几种多媒体类型都支持指定link作为存储路径。

    Starwhale Dataset 数据格式

    数据集由多个行组成,每个行成为为一个样本,每个样本包含若干 features ,features 是一个类 dict 结构,对key和value有一些简单的限制[L]

    • dict的key必须为str类型。
    • dict的value必须是 int/float/bool/str/bytes/dict/list/tuple 等 Python 的基本类型,或者 Starwhale 内置的数据类型
    • 不同样本的数据相同key的value,不需要保持同一类型。
    • 如果value是list或者tuple,其元素的数据类型必须一致。
    • value为dict时,其限制等同于限制[L]

    例子:

    {
    "img": GrayscaleImage(
    link=Link(
    "123",
    offset=32,
    size=784,
    _swds_bin_offset=0,
    _swds_bin_size=8160,
    )
    ),
    "label": 0,
    }

    文件类数据的处理方式

    Starwhale Dataset 对文件类型的数据进行了特殊处理,如果您不关心 Starwhale 的实现方式,可以忽略本小节。

    根据实际使用场景,Starwhale Dataset 对基类为 starwhale.BaseArtifact 的文件类数据有两种处理方式:

    • swds-bin: Starwhale 以自己的二进制格式 (swds-bin) 将数据合并成若干个大文件,能高效的进行索引、切片和加载。
    • remote-link: 满足用户的原始数据存放在某些外部存储上,比如 OSS 或 NAS 等,原始数据较多,不方便搬迁或者已经用一些内部的数据集实现进行封装过,那么只需要在数据中使用 link,就能建立索引。

    在同一个Starwhale 数据集中,可以同时包含两种类型的数据。

    - + \ No newline at end of file diff --git a/zh/0.5.12/dataset/yaml/index.html b/zh/0.5.12/dataset/yaml/index.html index e79fb00bf..687bfc419 100644 --- a/zh/0.5.12/dataset/yaml/index.html +++ b/zh/0.5.12/dataset/yaml/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    dataset.yaml 使用指南

    提示

    dataset.yaml 对于 swcli dataset build 构建数据集的过程是非必要的。

    Starwhale Dataset 构建的时候使用 dataset.yaml,若省略 dataset.yaml,则可以在 swcli dataset build 命令行参数中描述相关配置,可以认为 dataset.yamlbuild 命令行的配置文件化表述。

    YAML 字段描述

    字段描述是否必要类型默认值
    nameStarwhale Dataset的名字String
    handler为一个函数,返回一个Generator或一个可迭代的对象或一个实现 __iter__ 方法的类,格式为 {module 路径}:{类名函数名}String
    desc数据集描述信息String""
    versiondataset.yaml格式版本,目前仅支持填写 1.0String1.0
    attr数据集构建参数Dict
    attr.volume_sizeswds-bin格式的数据集每个data文件的大小。当写数字时,单位bytes;也可以是数字+单位格式,如64M, 1GB等Int或Str64MB
    attr.alignment_sizeswds-bin格式的数据集每个数据块的数据alignment大小,如果设置alignment_size为4k,数据块大小为7.9K,则会补齐0.1K的空数据,让数据块为alignment_size的整数倍,提升page size等读取效率Integer或String128

    使用示例

    最简示例

    name: helloworld
    handler: dataset:ExampleProcessExecutor

    helloworld的数据集,使用dataset.yaml目录中dataset.py文件中的 ExampleProcessExecutor 类进行数据构建。

    MNIST数据集构建示例

    name: mnist
    handler: mnist.dataset:DatasetProcessExecutor

    desc: MNIST data and label test dataset

    attr:
    alignment_size: 1k
    volume_size: 4M

    handler为generator function的例子

    dataset.yaml 内容:

    name: helloworld
    handler: dataset:iter_item

    dataset.py 内容:

    def iter_item():
    for i in range(10):
    yield {"img": f"image-{i}".encode(), "label": i}

    本例中,handler为一个generator function,Starwhale SDK根据首个yield出来的元素为非Starwhale.Link类型,等同于继承 starwhale.SWDSBinBuildExecutor 类。

    - + \ No newline at end of file diff --git a/zh/0.5.12/evaluation/heterogeneous/node-able/index.html b/zh/0.5.12/evaluation/heterogeneous/node-able/index.html index ed340db08..5e46d1961 100644 --- a/zh/0.5.12/evaluation/heterogeneous/node-able/index.html +++ b/zh/0.5.12/evaluation/heterogeneous/node-able/index.html @@ -10,7 +10,7 @@ - + @@ -24,7 +24,7 @@ 参考 链接

    v0.13.0-rc.1 为例

    kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.13.0-rc.1/nvidia-device-plugin.yml

    注意: 此操作会在所有的 K8s 节点中运行 NVIDIA 的 device plugin 插件, 如果之前配置过, 则会被更新, 请谨慎评估使用的镜像版本

  • 确认 GPU 可以在集群中发现和使用 参考下边命令, 查看 Jetson 节点的 Capacity 中有 nvidia.com/gpu, GPU 即被 K8s 集群正常识别

    # kubectl describe node orin | grep -A15 Capacity
    Capacity:
    cpu: 12
    ephemeral-storage: 59549612Ki
    hugepages-1Gi: 0
    hugepages-2Mi: 0
    hugepages-32Mi: 0
    hugepages-64Ki: 0
    memory: 31357608Ki
    nvidia.com/gpu: 1
    pods: 110
  • 制作和使用自定义镜像

    文章前面提到的 l4t-jetpack 镜像可以满足我们一般的使用, 如果我们需要自己定制更加精简或者更多功能的镜像, 可以基于 l4t-base 来制作 相关 Dockerfile 可以参考 Starwhale为mnist制作的镜像

    - + \ No newline at end of file diff --git a/zh/0.5.12/evaluation/heterogeneous/virtual-node/index.html b/zh/0.5.12/evaluation/heterogeneous/virtual-node/index.html index 8b7f3e96f..fa6d342fb 100644 --- a/zh/0.5.12/evaluation/heterogeneous/virtual-node/index.html +++ b/zh/0.5.12/evaluation/heterogeneous/virtual-node/index.html @@ -10,7 +10,7 @@ - + @@ -19,7 +19,7 @@ 此方案被各云厂商广泛用于 serverless 容器集群方案, 比如阿里云的 ASK, Amazon 的 AWS Fargate 等.

    原理

    virtual kubelet 框架将 kubelet 对于 Node 的相关接口进行实现, 只需要简单的配置即可模拟一个节点. 我们只需要实现 PodLifecycleHandler 接口即可支持:

    • 创建, 更新, 删除 Pod
    • 获取 Pod 状态
    • 获取 Container 日志

    将设备加入集群

    如果我们的设备由于资源限制等情况无法作为 K8s 的一个节点进行服务, 那么我们可以通过使用 virtual kubelet 模拟一个代理节点的方式对这些设备进行管理, Starwhale Controller 和设备的控制流如下


    ┌──────────────────────┐ ┌────────────────┐ ┌─────────────────┐ ┌────────────┐
    │ Starwhale Controller ├─────►│ K8s API Server ├────►│ virtual kubelet ├────►│ Our device │
    └──────────────────────┘ └────────────────┘ └─────────────────┘ └────────────┘

    virtual kubelet 将 Starwhale Controller 下发下来的 Pod 编排信息转化为对设备的控制行为, 比如 ssh 到设备上执行一段命令, 或者通过 USB 或者串口发送一段消息等.

    下面是使用 virtual kubelet 的方式来对一个未加入集群的可以 ssh 的设备进行控制的示例

    1. 准备证书
    • 创建文件 vklet.csr, 内容如下
    [req]
    req_extensions = v3_req
    distinguished_name = req_distinguished_name
    [req_distinguished_name]
    [v3_req]
    basicConstraints = CA:FALSE
    keyUsage = digitalSignature, keyEncipherment
    extendedKeyUsage = serverAuth
    subjectAltName = @alt_names
    [alt_names]
    IP = 1.2.3.4
    • 生成证书
    openssl genrsa -out vklet-key.pem 2048
    openssl req -new -key vklet-key.pem -out vklet.csr -subj '/CN=system:node:1.2.3.4;/C=US/O=system:nodes' -config ./csr.conf
    • 提交证书
    cat vklet.csr| base64 | tr -d "\n" # 输出内容作为 csr.yaml 文件中 spec.request 的内容

    csr.yaml

    apiVersion: certificates.k8s.io/v1
    kind: CertificateSigningRequest
    metadata:
    name: vklet
    spec:
    request: ******************************************************
    signerName: kubernetes.io/kube-apiserver-client
    expirationSeconds: 1086400
    usages:
    - client auth
     kubectl apply -f csr.yaml
    kubectl certificate approve vklet
    kubectl get csr vklet -o jsonpath='{.status.certificate}'| base64 -d > vklet-cert.pem

    现在我们得到了 vklet-cert.pem

    • 编译 virtual kubelet
    git clone https://github.com/virtual-kubelet/virtual-kubelet
    cd virtual-kubelet && make build

    创建节点的配置文件 mock.json

    {
    "virtual-kubelet":
    {
    "cpu": "100",
    "memory": "100Gi",
    "pods": "100"
    }
    }

    启动 virtual kubelet

    export APISERVER_CERT_LOCATION=/path/to/vklet-cert.pem
    export APISERVER_KEY_LOCATION=/path/to/vklet-key.pem
    export KUBECONFIG=/path/to/kubeconfig

    virtual-kubelet --provider mock --provider-config /path/to/mock.json

    至此, 我们使用 virtual kubelet 模拟了一个 100 core + 100G 内存的节点.

    • 增加 PodLifecycleHandler 的实现, 将 Pod 编排中的重要信息转化为 ssh 命令执行, 并且收集日志待 Starwhale Controller 收集

    具体实现可参考 ssh executor

    - + \ No newline at end of file diff --git a/zh/0.5.12/evaluation/index.html b/zh/0.5.12/evaluation/index.html index 7f8558ab7..b3deb85a7 100644 --- a/zh/0.5.12/evaluation/index.html +++ b/zh/0.5.12/evaluation/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    Starwhale 模型评测

    设计概述

    Starwhale Evaluation 定位

    Starwhale Evaluation 目标是对模型评测进行全流程管理,包括创建 Job、分发 Task、查看模型评测报告和基本管理等。Starwhale Evaluation 是 Starwhale构建的 MLOps 工具链使用 Starwhale ModelStarwhale DatasetStarwhale Runtime 三个基础元素,在模型评测这个场景上的具体应用,后续还会包含 Starwhale Model ServingStarwhale Training 等应用场景。

    核心功能

    • 可视化展示swcli和 Web UI都提供对模型评测结果的可视化展示,支持多个结果的对比等功能,同时用户可以自定义记录评测中间过程。
    • 多场景适配:不管是在笔记本的单机环境,还是在分布式服务器集群环境,都能使用统一的命令、Python脚本、制品和操作方法进行模型评测,满足不同算力、不同数据量的外部环境要求。
    • Starwhale无缝集成:使用Starwhale Runtime提供的运行环境,将 Starwhale Dataset 作为数据输入,在 Starwhale Model 中运行模型评测任务,不管是在 swcli、Python SDK 还是 Cloud/Server 实例 Web UI中,都能简单的进行配置。

    关键元素

    • swcli model run 命令行: 能够完成模型的批量、离线式评测。
    • swcli model serve 命令行: 能够完成模型的在线评测。

    最佳实践

    命令行分组

    从完成 Starwhale Evaluation 全流程任务的角度,可以将所涉及的命令分组如下:

    • 基础准备阶段
      • swcli dataset build 或 Starwhale Dataset Python SDK
      • swcli model build 或 Starwhale Model Python SDK
      • swcli runtime build
    • 评测阶段
      • swcli model run
      • swcli model serve
    • 结果展示阶段
      • swcli job info
    • 基本管理
      • swcli job list
      • swcli job remove
      • swcli job recover

    job-step-task 抽象

    • job: 一次模型评测任务就是一个 job,一个 job 包含一个或多个 step
    • step: step 对应评测过程中的某个阶段。使用PipelineHandler的默认评测过程,step就是predictevaluate;用户自定义的评测过程,step 就是使用 @handler, @evaluation.predict, @evaluation.evaluate 修饰的函数。step 之间可以有依赖关系,形成一个DAG。一个 step 包含一个或多个 task。同一 step 中的不同 task,执行逻辑是一致的,只是输入参数不同,常见做法是将数据集分割成若干部分,然后传入每个task 中,task 可以并行执行。
    • task: task 是最终运行的实体。在 Cloud/Server 实例中,一个 task 就是一个Pod的container; 在Standalone 实例中,一个 task 就是一个 Python Thread。

    job-step-task 的抽象是实现 Starwhale Evaluation 分布式运行的基础。

    - + \ No newline at end of file diff --git a/zh/0.5.12/faq/index.html b/zh/0.5.12/faq/index.html index 2a81e7fc5..203ec582d 100644 --- a/zh/0.5.12/faq/index.html +++ b/zh/0.5.12/faq/index.html @@ -10,13 +10,13 @@ - +
    - + \ No newline at end of file diff --git a/zh/0.5.12/getting-started/cloud/index.html b/zh/0.5.12/getting-started/cloud/index.html index a5c34d75a..c478e2c78 100644 --- a/zh/0.5.12/getting-started/cloud/index.html +++ b/zh/0.5.12/getting-started/cloud/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    Starwhale Cloud入门指南

    Starwhale Cloud运行在阿里云上,域名是 https://cloud.starwhale.cn ,后续我们会推出部署在AWS上的 https://cloud.starwhale.ai 服务,需要注意的是,这是两个相互独立的实例,帐户和数据不共享。您可以选择任何一个开始。

    在开始之前,您需要先安装Starwhale Client(swcli)

    注册Starwhale Cloud并创建您的第一个项目

    您可以直接使用自己的GitHub或微信帐号登录,也可以注册一个新的帐号。如果您使用 GitHub 或 微信帐号登录,系统会要求您提供用户名。

    然后您可以创建一个新项目。在本教程中,我们将使用名称 demo 作为项目名称。

    在本地机器上构建数据集、模型和运行时

    按照Starwhale Standalone入门指南中的步骤1到步骤4在本地机器上创建:

    • 一个名为mnist的Starwhale模型
    • 一个名为mnist的Starwhale数据集
    • 一个名为pytorch的Starwhale运行时

    登录云实例

    swcli instance login --username <您的用户名> --password <您的密码> --alias swcloud https://cloud.starwhale.cn

    将数据集、模型和运行时复制到Starwhale Cloud

    swcli model copy mnist swcloud/project/demo
    swcli dataset copy mnist swcloud/project/demo
    swcli runtime copy pytorch swcloud/project/demo

    使用 Web UI 运行评估

    console-create-job.gif

    恭喜! 您已完成Starwhale Cloud的入门指南。

    - + \ No newline at end of file diff --git a/zh/0.5.12/getting-started/index.html b/zh/0.5.12/getting-started/index.html index c787bf40d..fea025c01 100644 --- a/zh/0.5.12/getting-started/index.html +++ b/zh/0.5.12/getting-started/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    入门指南

    首先,您需要安装Starwhale Client(swcli),可以运行如下命令:

    python3 -m pip install starwhale

    更多详细信息请参阅swcli安装指南

    根据您使用的实例类型,您可以参考以下三个入门指南:

    • Starwhale Standalone入门指南 - 本指南可帮助您在台式PC/笔记本电脑上运行一个MNIST评估。这是开始使用Starwhale最快最简单的方法。
    • Starwhale Server入门指南 - 本指南可帮助您在私有服务器上安装Starwhale Server并运行一个MNIST评估。在本指南结束时,您将拥有一个Starwhale Server实例,您可以在其中管理您的数据集和模型。
    • Starwhale Cloud入门指南 - 本指南可帮助您在Starwhale Cloud上创建帐户并运行MNIST评估。这是体验所有Starwhale功能的最简单方法。
    - + \ No newline at end of file diff --git a/zh/0.5.12/getting-started/runtime/index.html b/zh/0.5.12/getting-started/runtime/index.html index 023b1bb10..31ffddf41 100644 --- a/zh/0.5.12/getting-started/runtime/index.html +++ b/zh/0.5.12/getting-started/runtime/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    Starwhale Runtime入门指南

    本文演示如何搭建Pytorch环境的Starwhale Runtime以及如何在不同环境中使用它。该runtime可以满足Starwhale中六个例子的依赖需求:mnist、speech commands、nmt、cifar10、ag_news、PennFudan。相关代码链接:example/runtime/pytorch

    您可以从本教程中学到以下内容:

    • 如何构建Starwhale Runtime。
    • 如何在不同场景下使用Starwhale Runtime。
    • 如何发布Starwhale Runtime。

    前置条件

    基础环境

    运行以下命令以克隆示例代码:

    git clone https://github.com/star-whale/starwhale.git
    cd starwhale/example/runtime/pytorch-cn-mirror #非中国大陆网络可使用pytorch例子

    构建Starwhale Runtime

    ❯ swcli -vvv runtime build --yaml runtime.yaml

    在Standalone Instance中使用Starwhale Runtime

    在shell中使用Starwhale Runtime

    # 激活runtime
    swcli runtime activate pytorch-cn-mirror

    swcli runtime activate会下载runtime的所有python依赖,并在当前shell环境中激活该环境。这个过程可能需要很长时间。

    当runtime被激活时,所有依赖项都已在您的python环境中准备就绪,类似于virtualenv的source venv/bin/activate或者conda的conda activate命令。如果您关闭了shell或切换到另一个shell,则下次使用之前需要重新激活这个runtime。

    在swcli中使用Starwhale Runtime

    # 模型构建中使用runtime
    swcli model build . --runtime pytorch-cn-mirror
    # 数据集构建中使用runtime
    swcli dataset build . --runtime pytorch-cn-mirror
    # 模型评测中使用runtime
    swcli model run --uri mnist/version/v0 --dataset mnist --runtime pytorch-cn-mirror

    将 Starwhale Runtime 复制到另一个实例

    您可以将运行时复制到Server/Cloud实例,然后可以在Server/Cloud实例中使用或由其他用户下载。

    # 将runtime复制到名为“pre-k8s”的Server实例
    ❯ swcli runtime copy pytorch-cn-mirror cloud://pre-k8s/project/starwhale
    - + \ No newline at end of file diff --git a/zh/0.5.12/getting-started/server/index.html b/zh/0.5.12/getting-started/server/index.html index 5d8922d07..23bed68a9 100644 --- a/zh/0.5.12/getting-started/server/index.html +++ b/zh/0.5.12/getting-started/server/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    Starwhale Server入门指南

    安装Starwhale Server

    安装 Starwhale Server,参见安装指南

    创建您的第一个项目

    登录服务器

    打开浏览器并在地址栏中输入服务器的 URL。 使用默认用户名(starwhale)和密码(abcd1234)登录。

    console-artifacts.gif

    创建一个新项目

    在本地机器上构建数据集、模型和运行时

    按照Starwhale Standalone入门指南中的步骤1到步骤4在本地机器上创建:

    • 一个名为mnist的Starwhale模型
    • 一个名为mnist的Starwhale数据集
    • 一个名为pytorch的Starwhale运行时

    将数据集、模型和运行时复制到Starwhale Server

    swcli instance login --username <your username> --password <your password> --alias server <Your Server URL>

    swcli model copy mnist server/project/demo
    swcli dataset copy mnistserver/project/demo
    swcli runtime copy pytorch server/project/demo

    使用Web UI运行模型评估

    使用浏览器打开“demo”项目并创建一个新的评估。

    console-create-job.gif

    恭喜! 您已完成Starwhale Server的入门指南。

    - + \ No newline at end of file diff --git a/zh/0.5.12/getting-started/standalone/index.html b/zh/0.5.12/getting-started/standalone/index.html index 968e8fe12..3d4e3cd3f 100644 --- a/zh/0.5.12/getting-started/standalone/index.html +++ b/zh/0.5.12/getting-started/standalone/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    Starwhale Standalone入门指南

    Starwhale Client(swcli)安装完成后,您就可以使用Starwhale Standalone。

    我们也提供对应的Jupyter Notebook例子,可以在 Google Colab 或本地的 vscode/jupyterlab 中试用。

    下载例子

    通过以下方式克隆Starwhale项目来下载Starwhale示例:

    GIT_LFS_SKIP_SMUDGE=1 git clone https://github.com/star-whale/starwhale.git --depth 1
    cd starwhale

    为了节省例子的下载时间,我们执行git clone命令时,忽略了git-lfs,并只保留最近一次的commit信息。我们选用ML/DL领域的HelloWorld程序-MNIST来介绍如何从零开始构建数据集、模型包和运行环境,并最终完成模型评测。接下来的操作都在 starwhale 目录中进行。

    核心工作流程

    构建 Pytorch 运行时

    运行时示例代码位于example/runtime/pytorch目录中。

    • 构建Starwhale运行时包:

      swcli runtime build --yaml example/runtime/pytorch/runtime.yaml
      提示

      当首次构建Starwhale Runtime时,由于需要创建venv或conda隔离环境,并下载相关的Python依赖,命令执行需要花费一段时间。时间长短取决与所在机器的网络情况和runtime.yaml中Python依赖的数量。建议合理设置机器的 ~/.pip/pip.conf 文件,填写缓存路径和适合当前网络环境的pypi mirror地址。

      处于中国大陆网络环境中的用户,可以参考如下配置:

      [global]
      cache-dir = ~/.cache/pip
      index-url = https://pypi.tuna.tsinghua.edu.cn/simple
      extra-index-url = https://mirrors.aliyun.com/pypi/simple/
    • 检查您本地的Starwhale运行时:

      swcli runtime list
      swcli runtime info pytorch

    构建模型

    模型示例代码位于 example/mnist 目录中。

    • 下载预训练模型文件:

      cd example/mnist
      CN=1 make download-model
      # 非中国大陆网络用户,可以省略 CN=1 环境变量
      cd -
    • 构建一个Starwhale模型:

      swcli model build example/mnist --runtime pytorch
    • 检查您本地的Starwhale模型:

      swcli model list
      swcli model info mnist

    构建数据集

    数据集示例代码位于 example/mnist 目录中。

    • 下载MNIST原始数据:

      cd example/mnist
      CN=1 make download-data
      # 非中国大陆网络用户,可以省略 CN=1 环境变量
      cd -
    • 构建Starwhale数据集:

      swcli dataset build --yaml example/mnist/dataset.yaml
    • 检查您本地的Starwhale数据集:

      swcli dataset list
      swcli dataset info mnist
      swcli dataset head mnist

    运行评估作业

    • 创建评估工作

      swcli -vvv model run --uri mnist --dataset mnist --runtime pytorch
    • 检查评估结果

      swcli job list
      swcli job info $(swcli job list | grep mnist | grep success | awk '{print $1}' | head -n 1)

    恭喜! 您已完成Starwhale Standalone的入门指南。

    - + \ No newline at end of file diff --git a/zh/0.5.12/index.html b/zh/0.5.12/index.html index 6f19f47f8..3dd142be6 100644 --- a/zh/0.5.12/index.html +++ b/zh/0.5.12/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    什么是Starwhale

    概述

    Starwhale是一个 MLOps/LLMOps平台,能够让您的模型创建、评估和发布流程变得更加轻松。它旨在为数据科学家和机器学习工程师创建一个方便的工具。

    Starwhale能够帮助您:

    • 跟踪您的训练/测试数据集历史记录,包括所有数据项及其相关标签,以便您轻松访问它们。
    • 管理您可以在团队中共享的模型包。
    • 在不同的环境中运行您的模型,无论是在 Nvidia GPU服务器上还是在嵌入式设备(如 Cherry Pi)上。
    • 为您的模型快速创建配备交互式 Web UI的在线服务。

    同时,Starwhale 是一个开放的平台,您可以创建插件来满足自己的需求。

    部署选项

    Starwhale的每个部署称为一个实例。所有实例都可以通过Starwhale Client(swcli)进行管理。

    您可以任选以下实例类型之一开始使用:

    • Starwhale Standalone - Starwhale Standalone 本质上是一套存储在本地文件系统中的数据库。它由 Starwhale Client(swcli)创建和管理。您只需安装 swcli 即可使用。目前,一台机器上的每个用户只能拥有一个Starwhale Standalone 实例。我们建议您使用 Starwhale Standalone 来构建和测试您的数据集和模型,然后再将它们推送到 Starwhale Server/Cloud 实例。
    • Starwhale Server - Starwhale Server 是部署在您本地服务器上的服务。除了 Starwhale Client(swcli)的文本交互界面,Starwhale Server还提供 Web UI供您管理数据集和模型,以及在Kubernetes集群中运行模型并查看运行结果。
    • Starwhale Cloud - Starwhale Cloud 是托管在公共云上的服务。 通过在https://cloud.starwhale.cn注册一个账号,您就可以使用Starwhale,而无需安装、运行和维护您自己的实例。 Starwhale Cloud 还提供公共资源供您下载,例如一些流行的开源集数据集、模型和运行时。查看 Starwhale Cloud 实例上的 “starwhale/public”项目以获取更多详细信息。

    在您决定要使用的实例类型时,请考虑以下因素:

    实例类型部署位置维护者用户界面可扩展性
    Starwhale Standalone您的笔记本电脑或本地服务器不需要命令行不可扩展
    Starwhale Server您的数据中心您自己Web UI和命令行可扩展,取决于您的 Kubernetes 集群
    Starwhale Cloud公共云,如AWS或阿里云Starwhale团队Web UI和命令行可扩展,但目前受到云上免费可用资源的限制
    - + \ No newline at end of file diff --git a/zh/0.5.12/model/index.html b/zh/0.5.12/model/index.html index ce2da4d6a..4908721ce 100644 --- a/zh/0.5.12/model/index.html +++ b/zh/0.5.12/model/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    Starwhale 模型

    Starwhale 模型是一种机器学习模型的标准包格式,可用于多种用途,例如模型微调、模型评估和在线服务。 Starwhale 模型包含模型文件、推理代码、配置文件等等。

    创建一个 Starwhale 模型

    创建 Starwhale 模型有两种方法:通过 swcli 或通过 SDK

    使用 swcli 创建 Starwhale 模型

    使用 swcli 创建 Starwhale 模型之前,您可以定义一个model.yaml,其中描述了关于Starwhale模型的一些必要信息,然后运行以下命令:

    swcli model build . --model-yaml /path/to/model.yaml

    有关该命令和 model.yaml 的更多信息,请参阅swcli参考。需要注意的是,model.yaml 是非必要的。

    使用 Python SDK 创建 Starwhale 模型

    from starwhale import model, predict

    @predict
    def predict_img(data):
    ...

    model.build(name="mnist", modules=[predict_img])

    管理 Starwhale 模型

    使用 swcli 管理 Starwhale 模型

    命令说明
    swcli model list列出项目中所有Starwhale模型
    swcli model info显示有关Starwhale模型的详细信息
    swcli model copy将Starwhale模型复制到另一个位置
    swcli model remove删除Starwhale模型
    swcli model recover恢复之前删除的Starwhale模型

    使用 Web 界面管理 Starwhale 模型

    管理 Starwhale 模型的历史版本

    Starwhale 模型是版本化的。关于版本的基本信息可以参考 Starwhale中的资源版本控制

    使用 swcli 管理 Starwhale 模型的历史版本

    命令说明
    swcli model history列出Starwhale模型的所有版本
    swcli model info显示某个Starwhale模型版本的详细信息
    swcli model diff比较两个版本的Starwhale模型
    swcli model copy复制某个Starwhale模型版本到新的版本
    swcli model remove删除某个Starwhale模型版本
    swcli model recover恢复以前删除的Starwhale模型版本

    模型评估

    使用swcli进行模型评估

    命令说明
    swcli model run指定某个Starwhale模型进行模型评估

    存储格式

    Starwhale模型是一个打包了原始目录的tar文件。

    - + \ No newline at end of file diff --git a/zh/0.5.12/model/yaml/index.html b/zh/0.5.12/model/yaml/index.html index e32eef55a..4ca59bd1f 100644 --- a/zh/0.5.12/model/yaml/index.html +++ b/zh/0.5.12/model/yaml/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    model.yaml 使用指南

    提示

    model.yaml 对于 swcli model build 构建模型的过程是非必要的。

    Starwhale Model 构建时,若使用 swcli model build 命令,可以通过 --model-yaml 参数指定符合特定格式的yaml文件,简化模型构建的参数指定。

    即使不指定 --model-yaml 参数,swcli model build 也会自动寻找 ${workdir} 目录下的 model.yaml 文件,会提取其中的参数。swcli model build 命令行中指定参数优先级大于 model.yaml 中的等价配置,可以认为 model.yamlbuild 命令行的配置文件化表述。

    当使用 Python SDK 方式构建 Starwhale 模型时,model.yaml 文件不生效。

    YAML 字段描述

    字段描述是否必要类型默认值
    nameStarwhale Model 的名字,等价于 --name 参数。String
    run.modules模型构建时搜索的Python Moduels,可以指定多个模型运行的入口点,格式为 Python 可 Imported 路径。等价于 --module 参数。List[String]
    run.handlerrun.modules的曾用写法,只能指定一个模型运行的入口点,已废弃String
    versiondataset.yaml格式版本,目前仅支持填写 1.0String1.0
    desc数据集描述信息,等价于 --desc 参数。String

    使用示例

    name: helloworld
    run:
    modules:
    - src.evaluator
    desc: "example yaml"

    名称为 helloworld 的 Starwhale 模型,搜索 swcli model build {WORKDIR} 命令中 ${WORKDIR} 目录相对的 src/evaluator.py 文件中被 @evaluation.predict, @evaluation.evaluate@handler 修饰的函数, 或继承自 PipelineHandler 的类,这些函数或类会被加入 Starwhale 模型可运行的入口点列表中,在 swcli model run 或 Web UI 运行时,选择对应的入口点(handler)运行模型。

    model.yaml 是非必要的,yaml 中定义参数可以在 swcli 命令行参数中指定。

    swcli model build . --model-yaml model.yaml

    等价于:

    swcli model build . --name helloworld --module src.evaluator --desc "example yaml"
    - + \ No newline at end of file diff --git a/zh/0.5.12/reference/sdk/dataset/index.html b/zh/0.5.12/reference/sdk/dataset/index.html index 600bc0dc3..0d6e1e60b 100644 --- a/zh/0.5.12/reference/sdk/dataset/index.html +++ b/zh/0.5.12/reference/sdk/dataset/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    Starwhale 数据集 SDK

    dataset

    获取 starwhale.Dataset 对象,包括创建新的数据集和加载已经存在的数据集两种方式。

    @classmethod
    def dataset(
    cls,
    uri: t.Union[str, Resource],
    create: str = _DatasetCreateMode.auto,
    readonly: bool = False,
    ) -> Dataset:

    参数

    • uri: (str 或 Resource, required)
      • Dataset URI 格式的字符串或 Resource 对象。
    • create: (str, optional)
      • 数据集创建模式,包括 auto, emptyforbid 三种方式。
        • auto 模式: 如果数据集已经存在,不会自动创建数据集;如果数据集不存在,则自动创建数据集。
        • empty 模式: 如果数据集已经存在,则抛出异常;如果数据集不存在,则自动创建数据集。
        • forbid 模式: 如果数据集已经存在,则不做任何事情;如果数据集不存在,则抛出异常。forbid 模式能确保数据集存在。
      • auto 模式是默认值。
    • readonly: (bool, optional)
      • 对于已经存在的数据集,可以指定 readonly=True 保证数据集以只读方式加载。
      • 默认值为 False

    使用示例

    from starwhale import dataset, Image

    # create a new dataset named mnist, and add a row into the dataset
    # dataset("mnist") is equal to dataset("mnist", create="auto")
    ds = dataset("mnist")
    ds.exists() # return False, "mnist" dataset is not existing.
    ds.append({"img": Image(), "label": 1})
    ds.commit()
    ds.close()

    # load a cloud instance dataset in readonly mode
    ds = dataset("cloud://remote-instance/project/starwhale/dataset/mnist", readonly=True)
    labels = [row.features.label in ds]
    ds.close()

    # load a read/write dataset with a specified version
    ds = dataset("mnist/version/mrrdczdbmzsw")
    ds[0].features.label = 1
    ds.commit()
    ds.close()

    # create an empty dataset
    ds = dataset("mnist-empty", create="empty")

    # ensure the dataset existence
    ds = dataset("mnist-existed", create="forbid")

    class starwhale.Dataset

    starwhale.Dataset 实现 Starwhale 数据集的抽象,能够对Standalone/Server/Cloud 实例上的数据集进行操作。

    from_huggingface

    from_huggingface 是一个 classmethod 方法,能够将 Huggingface 上的数据集转化为 Starwhale 数据集。

    def from_huggingface(
    cls,
    name: str,
    repo: str,
    subset: str | None = None,
    split: str | None = None,
    revision: str = "main",
    alignment_size: int | str = D_ALIGNMENT_SIZE,
    volume_size: int | str = D_FILE_VOLUME_SIZE,
    mode: DatasetChangeMode | str = DatasetChangeMode.PATCH,
    cache: bool = True,
    tags: t.List[str] | None = None,
    ) -> Dataset:

    参数

    • name: (str, required)
      • 数据集名称。
    • repo: (str, required)
    • subset: (str, optional)
      • Huggingface的数据集 subset 名称,如果HF数据集有多个 subsets, 您务必要指定一个 subset。
    • split: (str, optional)
      • Huggingface的数据集中 Split 名称。如果没有指定 split,则数据集中所有的 splits 数据都会被构建。
    • revision: (str, optional)
      • Huggingface的数据集版本,默认是 main,即main分支的最新一次提交。参数接受branch, tag 或 commit hash。
    • alignment_size: (int|str, optional)
      • 数据集 blob 文件中数据对齐的尺寸。
      • 默认值为 128,即128个字节对齐。
    • volume_size: (int|str, optional)
      • 数据集 blob 文件的最大尺寸,超过后会产生一个新的 blob 文件。
      • 默认值为 64MB,即每个blob文件64MB大小。
    • mode: (str|DatasetChangeMode, optional)
      • 数据集更新的模式,包括 patch 模式和 overwrite 模式。
      • 默认为 patch 模式。
    • cache: (bool, optional)
      • 是否使用 Huggingface 的本地缓存。
      • 默认使用缓存。
      • 缓存 = 下载文件缓存 + 本地Huggingface 数据集缓存。
    • tags: (List[str], optional)
      • 用户自定义的数据集标签。

    使用示例

    from starwhale import Dataset
    myds = Dataset.from_huggingface("mnist", "mnist")
    print(myds[0])
    from starwhale import Dataset
    myds = Dataset.from_huggingface("mmlu", "cais/mmlu", subset="anatomy", split="auxiliary_train", revision="7456cfb")

    from_json

    from_json 是一个 classmethod 方法,能够将 json 字符串转化为 Starwhale 数据集。

    @classmethod
    def from_json(
    cls,
    name: str,
    json_text: str,
    field_selector: str = "",
    alignment_size: int | str = D_ALIGNMENT_SIZE,
    volume_size: int | str = D_FILE_VOLUME_SIZE,
    mode: DatasetChangeMode | str = DatasetChangeMode.PATCH,
    tags: t.List[str] | None = None,
    ) -> Dataset:

    参数

    • name: (str, required)
      • 数据集名称
    • json_text: (str, required)
      • json 字符串,from_json 函数会序列化该字符串为 Python 对象,然后开始构建 Starwhale 数据集。
    • field_selector: (str, optional)
      • 可以提取 json_text 中特定的 array 结构。
      • 默认从 json 的根提取数据。
    • alignment_size: (int|str, optional)
      • 数据集 blob 文件中数据对齐的尺寸。
      • 默认值为 128,即128个字节对齐。
    • volume_size: (int|str, optional)
      • 数据集 blob 文件的最大尺寸,超过后会产生一个新的 blob 文件。
      • 默认值为 64MB,即每个blob文件64MB大小。
    • mode: (str|DatasetChangeMode, optional)
      • 数据集更新的模式,包括 patch 模式和 overwrite 模式。
      • 默认为 patch 模式。
    • tags: (List[str], optional)
      • 用户自定义的数据集标签。

    使用示例

    from starwhale import Dataset
    myds = Dataset.from_json(
    name="translation",
    json_text='[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]'
    )
    print(myds[0].features.en)
    from starwhale import Dataset
    myds = Dataset.from_json(
    name="translation",
    json_text='{"content":{"child_content":[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]}}',
    field_selector="content.child_content"
    )
    print(myds[0].features["zh-cn"])

    from_folder

    from_folder 是一个 classmethod 方法,能够读取指定目录中的 Image/Video/Audio 数据,并将其自动转化为 Starwhale 数据集。该函数支持如下特性:

    • 能够递归的搜索目标目录及子目录
    • 支持三种类型的文件提取:
      • image: 支持 png/jpg/jpeg/webp/svg/apng 图片类型。图片文件会被转化为 Starwhale.Image 类型。
      • video: 支持 mp4/webm/avi 视频类型。视频文件会被转化为 Starwhale.Video 类型。
      • audio: 支持 mp3/wav 音频类型。音频文件会被转化为 Starwhale.Audio 类型。
    • 每个文件对应数据集的一条记录,文件对应的数据集字段名称为 file
    • auto_label=True,则会使用父目录的名称作为该条数据的标签,对应 label 字段。根目录下的文件,则不会被打标签。
    • 若存在与 image/video/audio 同名的 txt 文件,则该文件内容会被作为 caption 字段内容存放到数据集中。
    • 若根目录存在 metadata.csvmetadata.jsonl 文件,则会自动读取文件的内容,并将其通过文件路径名作为关联,存入数据集中,可以用来指定 meta 信息。
      • metadata.csvmetadata.jsonl 文件是互斥的,当都存在的时候,程序会抛出异常。
      • metadata.csvmetadata.jsonl 每行记录中需要包含 file_name 字段,指向对应文件的路径。
      • metadata.csvmetadata.jsonl 对于数据集构建是可选的。
    @classmethod
    def from_folder(
    cls,
    folder: str | Path,
    kind: str | DatasetFolderSourceType,
    name: str | Resource = "",
    auto_label: bool = True,
    alignment_size: int | str = D_ALIGNMENT_SIZE,
    volume_size: int | str = D_FILE_VOLUME_SIZE,
    mode: DatasetChangeMode | str = DatasetChangeMode.PATCH,
    tags: t.List[str] | None = None,
    ) -> Dataset:

    参数

    • folder: (str|Path, required)
      • 文件夹路径
    • kind: (str|DatasetFolderSourceType, required)
      • 数据类型设置,目前支持 image, videoaudio 三种类型。
      • 会根据设置的 kind 值,在 folder 中递归寻找对应类型的文件。其他类型文件会被忽略掉。
    • name: (str|Resource, optional)
      • 数据集名称。
      • 若不指定,则使用目录名称作为数据集名称。
    • auto_label: (bool, optional)
      • 是否根据父目录的名字自动对每条记录打标签。
      • 默认为 True
    • alignment_size: (int|str, optional)
      • 数据集 blob 文件中数据对齐的尺寸。
      • 默认值为 128,即128个字节对齐。
    • volume_size: (int|str, optional)
      • 数据集 blob 文件的最大尺寸,超过后会产生一个新的 blob 文件。
      • 默认值为 64MB,即每个blob文件64MB大小。
    • mode: (str|DatasetChangeMode, optional)
      • 数据集更新的模式,包括 patch 模式和 overwrite 模式。
      • 默认为 patch 模式。
    • tags: (List[str], optional)
      • 用户自定义的数据集标签。

    使用示例 ${folder-example}

    • 函数调用示例

      from starwhale import Dataset

      # create a my-image-dataset dataset from /path/to/image folder.
      ds = Dataset.from_folder(
      folder="/path/to/image",
      kind="image",
      name="my-image-dataset"
      )
    • caption 示例

      folder/dog/1.png
      folder/dog/1.txt

      1.txt 中的内容,会填充到 1.png 所在行中 caption 字段中。

    • metadata.csvmetadata.jsonl 示例

      metadata.csv 内容:

      file_name, caption
      1.png, dog
      2.png, cat

      metadata.jsonl 内容:

      {"file_name": "1.png", "caption": "dog"}
      {"file_name": "2.png", "caption": "cat"}
    • 自动 label 示例

      folder/dog/1.png
      folder/cat/2.png
      folder/dog/3.png
      folder/cat/4.png

      生成的数据集中包含四条数据,分为 dogcat 两类。

    __iter__

    __iter__ 是一个 method 方法,能否对数据集进行迭代。

    from starwhale import dataset

    ds = dataset("mnist")

    for item in ds:
    print(item.index)
    print(item.features.label) # label 和 img 是 mnist数据集中的数据列
    print(item.features.img)

    batch_iter

    batch_iter 是一个 method 方法,能否批量的进行数据集迭代。

    def batch_iter(
    self, batch_size: int = 1, drop_not_full: bool = False
    ) -> t.Iterator[t.List[DataRow]]:

    参数

    • batch_size: (int, optional)
      • batch的大小,默认值为1。
    • drop_not_full: (bool, optional)
      • 最后一组batch数据数量小于 batch_size 时,该组数据是否会被抛弃掉。
      • 默认是不抛弃。

    使用示例

    from starwhale import dataset

    ds = dataset("mnist")
    for batch_rows in ds.batch_iter(batch_size=2):
    assert len(batch_rows) == 2
    print(batch_rows[0].features)

    __getitem__

    __getitem__ 是一个 method 方法,能提供数据集中某些行数据的获取,操作方式类似 Python 的 dict 和 list 类型。

    from starwhale import dataset

    ds = dataset("mock-int-index")

    # if the index type is string
    ds["str_key"] # get the DataRow by the "str_key" string key
    ds["start":"end"] # get a slice of the dataset by the range ("start", "end")

    ds = dataset("mock-str-index")
    # if the index type is int
    ds[1] # get the DataRow by the 1 int key
    ds[1:10:2] # get a slice of the dataset by the range (1, 10), step is 2

    __setitem__

    __setitem__ 是一个 method 方法,能提供数据集中行数据的更新,操作方式类似 Python 的 dict 类型。__setitem__ 支持多线程并行插入数据。

    def __setitem__(
    self, key: t.Union[str, int], value: t.Union[DataRow, t.Tuple, t.Dict]
    ) -> None:

    参数

    • key: (int|str, required)
      • key 即为数据集中每行的 index,类型为 int 或 str,一个数据集中只接受一种类型。
    • value: (DataRow|tuple|dict, required)
      • value 即为数据集中每行的 features,一般建议用 Python 的 dict 类型。

    使用示例

    • 插入数据

    test 数据中插入两条数据,index分别为 testtest2

    from starwhale import dataset

    with dataset("test") as ds:
    ds["test"] = {"txt": "abc", "int": 1}
    ds["test2"] = {"txt": "bcd", "int": 2}
    ds.commit()
    • 并行插入数据
    from starwhale import dataset, Binary
    from concurrent.futures import as_completed, ThreadPoolExecutor

    ds = dataset("test")

    def _do_append(_start: int) -> None:
    for i in range(_start, 100):
    ds.append((i, {"data": Binary(), "label": i}))

    pool = ThreadPoolExecutor(max_workers=10)
    tasks = [pool.submit(_do_append, i * 10) for i in range(0, 9)]

    ds.commit()
    ds.close()

    __delitem__

    __delitem__ 是一个 method 方法,用来删除数据集中的某些行数据。

    def __delitem__(self, key: _ItemType) -> None:
    from starwhale import dataset

    ds = dataset("existed-ds")
    del ds[6:9]
    del ds[0]
    ds.commit()
    ds.close()

    append

    append 是一个 method 方法,用来向数据集中添加数据,类似 Python list 的 append 函数。

    • 添加 features dict,每行数据自动 index 为 int 类型,从0开始自增。

      from starwhale import dataset, Image

      with dataset("new-ds") as ds:
      for i in range(0, 100):
      ds.append({"label": i, "image": Image(f"folder/{i}.png")})
      ds.commit()
    • 添加 index + features dict,数据集中每行数据的 index 不会被自动处理。

      from dataset import dataset, Image

      with dataset("new-ds") as ds:
      for i in range(0, 100):
      ds.append((f"index-{i}", {"label": i, "image": Image(f"folder/{i}.png")}))

      ds.commit()

    extend

    extend 是一个 method 方法,用来向数据集中批量添加数据,类似 Python list 的 extend 函数。

    from starwhale import dataset, Text

    ds = dataset("new-ds")
    ds.extend([
    (f"label-{i}", {"text": Text(), "label": i}) for i in range(0, 10)
    ])
    ds.commit()
    ds.close()

    commit

    commit 是一个 method 方法,调用 commit 时会将当前缓存中数据 flush 到存储中,并产生一个数据集版本,后续可以用这个版本信息加载相应的数据集内容。

    对于一个数据集,如果添加一些数据后,并没有调用 commit 方法,而是直接调用 close 或退出进程,那么这些数据依旧会写入到数据集中,只是没有一个生成一个新的版本。

    @_check_readonly
    def commit(
    self,
    tags: t.Optional[t.List[str]] = None,
    message: str = "",
    force_add_tags: bool = False,
    ignore_add_tags_errors: bool = False,
    ) -> str:

    参数

    • tags: (List(str), optional)
      • 指定 tags,可以指定多个tag。
    • message: (str, optional)
      • 提交信息,默认为空。
    • force_add_tags: (bool, optional)
      • 当给改版本添加标签时,对于 server/cloud 实例,若标签已经被应用到其他数据集版本时,可以使用 force_add_tags=True 参数强制将标签添加到此版本上,否则会抛出异常。
      • 默认为 False
    • ignore_add_tags_errors: (bool, optional)
      • 忽略添加标签是否抛出的异常。
      • 默认为 False

    使用示例

    from starwhale import dataset
    with dataset("mnist") as ds:
    ds.append({"label": 1})
    ds.commit(message="init commit")

    readonly

    readonly 是一个 property 属性,表示数据集是否只读,返回值为 bool 类型。

    from starwhale import dataset
    ds = dataset("mnist", readonly=True)
    assert ds.readonly

    loading_version

    loading_version 是一个 property 属性,字符串类型。

    • 当加载一个已经存在的数据集时,返回的是数据集加载的对应版本。
    • 对加载一个不存在的数据集时,返回的是 pending_commit_version

    pending_commit_version

    pending_commit_version 是一个 property 属性,字符串类型,表示待提交的版本信息。当调用 commit 方法后,pending_commit_version 会变成 committed_version

    committed_version

    committed_version 是一个 property 属性,字符串类型,表示已经调用 commit 方法后生成的版本信息。当没有调用 commit 方法时,访问该属性时程序会抛出异常。

    remove

    remove 是一个 method 方法,等价于 swcli dataset remove 命令,能够删除数据集。

    def remove(self, force: bool = False) -> None:

    recover

    recover 是一个 method 方法,等价于 swcli dataset recover 命令,能够对软删除且未GC的数据集进行恢复。

    def recover(self, force: bool = False) -> None:

    summary

    summary 是一个 method 方法,等价于 swcli dataset summary 命令,返回数据集摘要信息。

    def summary(self) -> t.Optional[DatasetSummary]:

    history

    history 是一个 method 方法,等价于 swcli dataset history 命令,返回数据集的历史记录。

    def history(self) -> t.List[t.Dict]:

    flush

    flush 是一个 method 方法,能够将内存中暂存的数据刷到持久化存储中。commitclose 方法会自动调用 flush

    close

    close 是一个 method 方法,关闭已经打开的数据集相关链接。Dataset 也实现了 contextmanager,使用 with 语法后可以自动关闭数据集,不需要主动调用 close 方法。

    from starwhale import dataset

    ds = dataset("mnist")
    ds.close()

    with dataset("mnist") as ds:
    print(ds[0])

    head 是一个 method 方法,能够显示数据集前n行数据,等价于 swcli dataset head 命令。

    def head(self, n: int = 5, skip_fetch_data: bool = False) -> t.List[DataRow]:

    fetch_one

    fetch_one 是一个 method 方法,获得数据集的第一条记录,相当于 head(n=1)[0]

    list

    list 是一个 classmethod 方法,能够列出项目 URI 下的 Starwhale 数据集,等价于 swcli dataset list 命令。

    @classmethod
    def list(
    cls,
    project_uri: t.Union[str, Project] = "",
    fullname: bool = False,
    show_removed: bool = False,
    page_index: int = DEFAULT_PAGE_IDX,
    page_size: int = DEFAULT_PAGE_SIZE,
    ) -> t.Tuple[t.List[t.Dict[str, t.Any]], t.Dict[str, t.Any]]:

    copy

    copy 是一个 method 方法,能够复制数据到其他实例上,等价于 swcli dataset copy 命令。

    def copy(
    self,
    dest_uri: str,
    dest_local_project_uri: str = "",
    force: bool = False,
    mode: str = DatasetChangeMode.PATCH.value,
    ignore_tags: t.List[str] | None = None,
    ) -> None:

    参数

    • dest_uri: (str, required)
      • Dataset URI
    • dest_local_project_uri: (str, optional)
      • 从远端复制到本地 Standalone 实例时,可以指定对应的项目 URI。
    • force: (bool, optional)
      • 当目标实例上已经有相同版本的数据集时,是否强制覆盖。
      • 默认不覆盖。
      • 当复制标签到远端 Server/Cloud 实例时,若标签已经被其他版本使用,使用 force=True 参数可以强制变更标签到本版本上。
    • mode: (str, optional)
      • 数据集复制模式,分为 patch 模式 和 overwrite 模式,默认为 patch
      • patch: 使用补丁方式更新数据集,只更新计划变更的行和列,在新生成的版本中仍能读取到未受影响的行和列。
      • overwrite: 使用覆盖方式更新数据集,会将原来的所有行都删除,然后再进行更新,在新生成的版本中读取不到老数据。但请放心,删除的数据依旧可以通过旧版本进行访问。
    • ignore_tags (List[str], optional)
      • 复制数据集时,可以忽略的自定义标签。
      • 默认会复制所有用户自定义标签到其他实例中。
      • 复制标签会忽略 latest^v\d+$ 内建标签。

    使用示例

    from starwhale import dataset
    ds = dataset("mnist")
    ds.copy("cloud://remote-instance/project/starwhale")

    to_pytorch

    to_pytorch 是一个 method 方法,能够将 Starwhale 数据集转化为 Pytorch 的 torch.utils.data.Dataset 类型,可以进一步传给 torch.utils.data.DataLoader 进行使用。

    需要注意的是,to_pytorch 函数返回的是 Pytorch 的 IterableDataset

    def to_pytorch(
    self,
    transform: t.Optional[t.Callable] = None,
    drop_index: bool = True,
    skip_default_transform: bool = False,
    ) -> torch.utils.data.Dataset:

    参数

    • transform: (callable, optional)
      • 支持用户自定义变换函数,能够按需转化数据类型。
    • drop_index: (bool, optional)
      • 是否抛弃掉 index 字段。
    • skip_default_transform: (bool, optional)
      • 如果没有设置 transform, 默认状态下会使用 Starwhale 内建的 transform 函数,对数据进行转化,可以通过 skip_default_transform 参数禁用内建数据转化。

    使用示例

    import torch.utils.data as tdata
    from starwhale import dataset

    ds = dataset("mnist")

    torch_ds = ds.to_pytorch()
    torch_loader = tdata.DataLoader(torch_ds, batch_size=2)
    import torch.utils.data as tdata
    from starwhale import dataset

    with dataset("mnist") as ds:
    for i in range(0, 10):
    ds.append({"txt": Text(f"data-{i}"), "label": i})

    ds.commit()

    def _custom_transform(data: t.Any) -> t.Any:
    data = data.copy()
    txt = data["txt"].to_str()
    data["txt"] = f"custom-{txt}"
    return data

    torch_loader = tdata.DataLoader(
    dataset(ds.uri).to_pytorch(transform=_custom_transform), batch_size=1
    )
    item = next(iter(torch_loader))
    assert isinstance(item["label"], torch.Tensor)
    assert item["txt"][0] in ("custom-data-0", "custom-data-1")

    to_tensorflow

    to_tensorflow 是一个 method 方法,能够将 Starwhale 数据集转化为 Tensorflow 的 tensorflow.data.Dataset 类型。

    def to_tensorflow(self, drop_index: bool = True) -> tensorflow.data.Dataset:

    参数

    • drop_index: (bool, optional)
      • 是否抛弃掉 index 字段。

    使用示例

    from starwhale import dataset
    import tensorflow as tf

    ds = dataset("mnist")
    tf_ds = ds.to_tensorflow(drop_index=True)
    assert isinstance(tf_ds, tf.data.Dataset)

    with_builder_blob_config

    with_builder_blob_config 是一个 method 方法,用来设置 Starwhale 数据集中 blob 的相关属性信息。需要在变更数据之前调用。

    def with_builder_blob_config(
    self,
    volume_size: int | str | None = D_FILE_VOLUME_SIZE,
    alignment_size: int | str | None = D_ALIGNMENT_SIZE,
    ) -> Dataset:

    参数

    • volume_size: (int|str, optional)
      • 单个数据集 blob 文件的大小。
      • 默认值为 64MB。
      • 当类型为 int 时,单位为 Bytes。
      • 当类型为 str 是,格式类似 1GB, 64MB
    • alignment_size: (int|str, optional)
      • 数据集 blob 文件中数据对齐的大小
      • 默认值为 128个字节。
      • volume_size 一样的类型解析。

    使用示例

    from starwhale import dataset, Binary

    ds = dataset("mnist").with_builder_blob_config(volume_size="32M", alignment_size=128)
    ds.append({"data": Binary(b"123")})
    ds.commit()
    ds.close()

    with_loader_config

    with_loader_config 是一个 method 方法,用来设置 Starwhale 数据集 loader 的过程参数。

    def with_loader_config(
    self,
    num_workers: t.Optional[int] = None,
    cache_size: t.Optional[int] = None,
    field_transformer: t.Optional[t.Dict] = None,
    ) -> Dataset:

    参数

    • num_workers: (int, optional)
      • 加载数据集的 worker 数目,默认为2。
    • cache_size: (int, optional)
      • 预加载的数据的数量,默认为20条。
    • field_transformer: (dict, optional)
      • features 字段名称的变换。

    使用示例

    from starwhale import Dataset, dataset
    Dataset.from_json(
    "translation",
    '[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]'
    )
    myds = dataset("translation").with_loader_config(field_transformer={"en": "en-us"})
    assert myds[0].features["en-us"] == myds[0].features["en"]
    from starwhale import Dataset, dataset
    Dataset.from_json(
    "translation2",
    '[{"content":{"child_content":[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]}}]'
    )
    myds = dataset("translation2").with_loader_config(field_transformer={"content.child_content[0].en": "en-us"})
    assert myds[0].features["en-us"] == myds[0].features["content"]["child_content"][0]["en"]
    - + \ No newline at end of file diff --git a/zh/0.5.12/reference/sdk/evaluation/index.html b/zh/0.5.12/reference/sdk/evaluation/index.html index 1533c9a85..65c99ea73 100644 --- a/zh/0.5.12/reference/sdk/evaluation/index.html +++ b/zh/0.5.12/reference/sdk/evaluation/index.html @@ -10,7 +10,7 @@ - + @@ -18,7 +18,7 @@
    版本:0.5.12

    Starwhale 模型评测 SDK

    @evaluation.predict

    @evaluation.predict 是一个修饰器,定义模型评测中的推理过程,类似 MapReduce 中的 map 阶段,能够完成如下核心功能:

    • 在 Server 实例上,申请运行所需要的资源。
    • 自动读取本地或远端的数据集,将数据集中的数据以单条或批量的方式,传递给 evaluation.predict 修饰的函数。
    • 通过多副本的设置,实现分布式数据集消费的功能,能以水平扩展的方式缩短模型评测任务的用时。
    • 自动将函数返回值和数据集的输入 features 存储到 results 表中,方便Web UI展示和进一步的 evaluate 阶段使用。
    • 每单条或每批量组数据会调用一次被修饰的函数,完成推理过程。

    控制参数

    • resources: (dict, optional)
      • 定义 predict 每个任务在 Server 实例上运行时所需要的资源,包括 memcpunvidia.com/gpu 三种类型。
        • mem: 单位为 Bytes,支持 int 和 float 类型。
          • 支持以字典的方式设置 requestlimit,比如 resources={"mem": {"request": 100 * 1024, "limit": 200: 1024}}
          • 若仅设置单个数字,则 SDK 会自动将 requestlimit 设置为相同的数值,比如 resources={"mem": 100 * 1024} 等价于 resources={"mem": {"request": 100 * 1024, "limit": 100 * 1024}}
        • cpu: 单位为 CPU 核心数,支持 int 和 float 类型。
          • 支持以字典的方式设置 requestlimit,比如 resources={"cpu": {"request": 1, "limit": 2}}
          • 若仅设置单个数字,则 SDK 会自动将 requestlimit 设置为相同的数值,比如 resources={"cpu": 1.5} 等价于 resources={"cpu": {"request": 1.5, "limit": 1.5}}
        • nvidia.com/gpu: 单位为 GPU显卡数,支持 int 类型。
          • nvidia.com/gpu 不支持设置 requestlimit,仅支持单个数字。
      • 需要注意: resource 参数目前仅在 Server 实例中生效。Cloud 实例,通过在提交评测任务时,选择对应的资源池达到相同的作用。Standalone 实例完全不支持该特性。
    • replicas: (int, optional)
      • predict 运行的副本数。
      • predict 相当于定义了一个 Step, 在该 Step 中有若干等价的 Task,每个 Task 在 Cloud/Server 实例上运行实体是 Pod,在 Standalone 实例上运行实体是 Thread。
      • 当指定多个副本时,这些副本是等价的,它们会共同消费选定的数据集,实现分布式数据集消费的目的,可以理解为某个数据集中的某行数据,只会被一个 predict 副本读取。
      • 默认值为1。
    • batch_size: (int, optional)
      • 批量将数据集中的数据传递进函数中。
      • 默认值为1。
    • fail_on_error: (bool, optional)
      • 当被修饰的函数抛出异常时,是否中断所有模型评测。如果预期某些“异常”数据会导致评测失败,但不想中断整体评测,可以设置 fail_on_error=False
      • 默认为 True
    • auto_log: (bool, optional)
      • 是否自动记录函数返回值和数据集输入 features 到 results 表中。
      • 默认为 True
    • log_mode: (str, optional)
      • auto_log=True 时,可以通过设置 log_mode 参数,定义以 plainpickle 方式记录函数返回值。
      • 默认为 pickle 方式。
    • log_dataset_features: (List[str], optional)
      • auto_log=True 时,可以通过该参数,选择性的记录数据集中的某些 features 。
      • 默认会记录所有的 features 。
    • needs: (List[Callable], optional)
      • 定义该任务运行的前置条件,可以用 needs 语法实现 DAG。
      • needs 接受被 @evaluation.predict, @evaluation.evaluate@handler 修饰的函数。
      • 默认为空,不依赖任何其他任务。

    传入参数

    被修饰的函数,需要定义一些输入参数,用来接受数据集内容等,包含如下模式:

    • 单个 data 参数:

      • data 为 一个类 dict 类型,能够读取到数据集的 features 内容。
      • batch_size=1 或不设置 batch_size 时,可以通过 data['label']data.label 方式读取 label feature。
      • 当设置 batch_size > 1 时,data 为一个 list。
      from starwhale import evaluation

      @evaluation.predict
      def predict(data):
      print(data['label'])
      print(data.label)
    • data + external 参数方式:

      • data 为数据集的features。
      • external 为一个 dict 类型,包含 index, index_with_dataset, dataset_info, contextdataset_uri 这些内建属性,可以用来做更细粒度的处理。
        • index: 数据集对应行的 index 信息。
        • index_with_dataset: 适用于多个数据集输入的时候做 index 区分。
        • dataset_info: starwhale.core.dataset.tabular.TabularDatasetInfo 对象。
        • context: starwhale.Context 对象。
        • dataset_uri: starwhale.nase.uri.resource.Resource 对象。
      from starwhale import evaluation

      @evaluation.predict
      def predict(data, external):
      print(data['label'])
      print(data.label)
      print(external["context"])
      print(external["dataset_uri"])
    • data + **kw 方式:

      • data 为数据集的features。
      • kw 可以读取到 external
      from starwhale import evaluation

      @evaluation.predict
      def predict(data, **kw):
      print(kw["external"]["context"])
      print(kw["external"]["dataset_uri"])
    • *args + **kwargs 方式:

      • args的第一个元素为 data
      from starwhale import evaluation

      @evaluation.predict
      def predict(*args, **kw):
      print(args[0].label)
      print(args[0]["label"])
      print(kw["external"]["context"])
    • **kwargs 方式:

      from starwhale import evaluation

      @evaluation.predict
      def predict(**kw):
      print(kw["data"].label)
      print(kw["data"]["label"])
      print(kw["external"]["context"])
    • *args 方式:

      • 此方式无法读取到 external 信息。
      from starwhale import evaluation

      @evaluation.predict
      def predict(*args):
      print(args[0].label)
      print(args[0]["label"])

    使用示例

    from starwhale import evaluation

    @evaluation.predict
    def predict_image(data):
    ...

    @evaluation.predict(
    dataset="mnist/version/latest",
    batch_size=32,
    replicas=4,
    needs=[predict_image],
    )
    def predict_batch_images(batch_data)
    ...

    @evaluation.predict(
    resources={"nvidia.com/gpu": 1,
    "cpu": {"request": 1, "limit": 2},
    "mem": 200 * 1024}, # 200MB
    log_mode="plain",
    )
    def predict_with_resources(data):
    ...

    @evaluation.predict(
    replicas=1,
    log_mode="plain",
    log_dataset_features=["txt", "img", "label"],
    )
    def predict_with_selected_features(data):
    ...

    @evaluation.evaluate

    @evaluation.evalute 是一个修饰器,定义模型评测中的评测过程,类似 MapReduce 中的 reduce 阶段,能够完成如下核心功能:

    • 在 Server 实例上,申请运行所需要的资源。
    • 自动读取 predict 阶段记录到 results 表的数据,并以迭代器的方式传入函数中。
    • evaluate 阶段只会运行一个副本,无法像 predict 阶段一样定义 replicas 参数。

    参数

    • resources: (dict, optional)
      • @evaluation.predict 中的 resources 参数定义保持一致。
    • needs: (List[Callable], optional)
      • @evaluation.predict 中的 needs 参数定义保持一致。
      • 绝大多数场景中,会依赖一个 @evaluation.predict 修饰的函数。
    • use_predict_auto_log: (bool, optional)
      • 默认为 True,传入一个能够能够遍历 predict 结果的迭代器到函数中。

    输入参数

    • use_predict_auto_log=True(默认)时,传入一个能够能够遍历 predict 结果的迭代器到函数中。
      • 迭代出来的对象为一个字典,包含 outputinput 两个key。
        • outputpredict 阶段函数返回的元素。
        • input 为推理时对应使用的数据集的 features ,为一个字典类型。
    • use_predict_auto_log=False 时,不传入任何参数到函数中。

    使用示例

    from starwhale import evaluation

    @evaluation.evaluate(needs=[predict_image])
    def evaluate_results(predict_result_iter):
    ...

    @evaluation.evaluate(
    use_predict_auto_log=False,
    needs=[predict_image],
    )
    def evaluate_results():
    ...

    evaluation.log

    evaluation.log 是一个函数,记录某些评测指标到特定表中,之后可以通过 Server/Cloud 实例的 Web 页面中查看相关的表。

    @classmethod
    def log(
    cls, category: str, id: t.Union[str, int], metrics: t.Dict[str, t.Any]
    ) -> None:

    参数

    • category: (str, required)
      • 记录的类别,该值会被作为 Starwhale Datastore 的表名的后缀。
      • 一个 category 会对应一张 Starwhale Datastore 的表,这些表会以评测任务ID作为隔离区分,相互不影响。
    • id: (str|int, required)
      • 记录的ID,表内唯一。
      • 同一张表,只能采用 str 或 int 的一种类型作为 ID 类型。
    • metrics: (dict, required)
      • 字典类型,key-value 方式记录指标。

    使用示例

    from starwhale import evaluation

    evaluation.log("label/1", 1, {"loss": 0.99, "accuracy": 0.98})
    evaluation.log("ppl", "1", {"a": "test", "b": 1})

    evaluation.log_summary

    evaluation.log_summary 是一个函数,记录某些指标到 summary 表中,Server/Cloud 实例评测页面显示的就是 summary 表的数据。 每次调用,Starwhale 都会自动以此次评测的唯一ID作为表的行ID进行更新,可以再一次评测过程中多次调用该函数,用来更新不同的列。

    每个项目中有一张 summary 表,所有该项目下的评测任务都会将 summary 信息写入该表中。

    @classmethod
    def log_summary(cls, *args: t.Any, **kw: t.Any) -> None:

    使用示例

    from starwhale import evaluation

    evaluation.log_summary(loss=0.99)
    evaluation.log_summary(loss=0.99, accuracy=0.99)
    evaluation.log_summary({"loss": 0.99, "accuracy": 0.99})

    evaluation.iter

    evaluation.iter 是一个函数,返回一个迭代器,用来迭代式的读取某些模型评测表中的数据。

    @classmethod
    def iter(cls, category: str) -> t.Iterator:

    参数

    • category: (str, required)
      • evaluation.log 函数中的 category 参数含义一致。

    使用示例

    from starwhale import evaluation

    results = [data for data in evaluation.iter("label/0")]

    @handler

    @handler 是一个修饰器,具备如下功能:

    • 在 Server 实例上,申请运行所需要的资源。
    • 可以控制副本数。
    • 多个 Handlers 可以通过依赖关系,生成DAG,便于控制执行流程。
    • 可以对外暴露端口,以类似 Web Handler 方式运行。

    @fine_tune, @evaluation.predict@evaluation.evalute 可以认为是 @handler 在某些特定领域的应用,@handler 是这些修饰器的底层实现。@handler 更为基础和灵活。

    @classmethod
    def handler(
    cls,
    resources: t.Optional[t.Dict[str, t.Any]] = None,
    replicas: int = 1,
    needs: t.Optional[t.List[t.Callable]] = None,
    name: str = "",
    expose: int = 0,
    require_dataset: bool = False,
    ) -> t.Callable:

    参数

    • resources: (dict, optional)
      • @evaluation.predict 中的 resources 参数定义保持一致。
    • needs: (List[Callable], optional)
      • @evaluation.predict 中的 needs 参数定义保持一致。
    • replicas: (int, optional)
      • @evaluation.predict 中的 replicas 参数定义保持一致。
    • name: (str, optional)
      • 显示 handler 时候用的名字。
      • 若不指定,则用修饰函数的名字。
    • expose: (int, optional)
      • 对外暴露的端口,当运行一个 Web Handler的时候,需要声明暴露的端口。
      • 默认为0,表示不暴露任何端口。
      • 目前只能暴露一个端口。
    • require_dataset: (bool, optional)
      • 定义此 Handler 运行时,是否需要数据集。
      • 如果 required_dataset=True,在 Server/Cloud 实例的 Web 界面创建评测任务的时候,需要让用户强制输入数据集;如果 required_dataset=False,则 Web 界面中不需要用户指定数据集。
      • 默认为 False

    使用示例

    from starwhale import handler
    import gradio

    @handler(resources={"cpu": 1, "nvidia.com/gpu": 1}, replicas=3)
    def my_handler():
    ...

    @handler(needs=[my_handler])
    def my_another_handler():
    ...

    @handler(expose=7860)
    def chatbot():
    with gradio.Blocks() as server:
    ...
    server.launch(server_name="0.0.0.0", server_port=7860)

    @fine_tune

    fine_tune 是一个修饰器,定义模型训练的微调(fine-tune)过程。

    一些限制和使用建议:

    • fine_tune 只有一个副本。
    • fine_tune 需要有数据集输入。
    • 一般在 fine_tune 开始时,通过 Context.get_runtime_context() 获取数据集。
    • 一般在 fine_tune 结束是,通过 starwhale.model.build 生成微调后的Starwhale 模型包,该模型包会被自动复制到评测对应的项目中。

    参数

    • resources: (dict, optional)
      • @evaluation.predict 中的 resources 参数定义保持一致。
    • needs: (List[Callable], optional)
      • @evaluation.predict 中的 needs 参数定义保持一致。

    使用示例

    from starwhale import model as starwhale_model
    from starwhale import fine_tune, Context

    @fine_tune(resources={"nvidia.com/gpu": 1})
    def llama_fine_tuning():
    ctx = Context.get_runtime_context()

    if len(ctx.dataset_uris) == 2:
    # TODO: use more graceful way to get train and eval dataset
    train_dataset = dataset(ctx.dataset_uris[0], readonly=True, create="forbid")
    eval_dataset = dataset(ctx.dataset_uris[1], readonly=True, create="forbid")
    elif len(ctx.dataset_uris) == 1:
    train_dataset = dataset(ctx.dataset_uris[0], readonly=True, create="forbid")
    eval_dataset = None
    else:
    raise ValueError("Only support 1 or 2 datasets(train and eval dataset) for now")

    #user training code
    train_llama(
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    )

    model_name = get_model_name()
    starwhale_model.build(name=f"llama-{model_name}-qlora-ft")

    @multi_classification

    @multi_classification 修饰器使用sklearn lib对多分类问题进行结果分析,输出confusion matrix, roc, auc等值,并且会写入到 starwhale DataStore 相关表中。 使用的时候需要对所修饰的函数返回值有一定要求,返回(label, result, probability_matrix)(label, result)

    def multi_classification(
    confusion_matrix_normalize: str = "all",
    show_hamming_loss: bool = True,
    show_cohen_kappa_score: bool = True,
    show_roc_auc: bool = True,
    all_labels: t.Optional[t.List[t.Any]] = None,
    ) -> t.Any:

    参数

    • confusion_matrix_normalize: (str, optional)
      • 接收三种参数:
        • true: rows
        • pred: columns
        • all: rows+columns
    • show_hamming_loss: (bool, optional)
      • 是否计算hamming loss。
      • 默认为 True
    • show_cohen_kappa_score: (bool, optional)
      • 是否计算 cohen kappa score。
      • 默认为 True
    • show_roc_auc: (bool, optional)
      • 是否计算roc/auc, 计算的时候,需要函数返回(label,result, probability_matrix) 三元组,否则只需返回(label, result) 两元组即可。
      • 默认为 True
    • all_labels: (List, optional)
      • 定义所有的Labels。

    使用示例


    @multi_classification(
    confusion_matrix_normalize="all",
    show_hamming_loss=True,
    show_cohen_kappa_score=True,
    show_roc_auc=True,
    all_labels=[i for i in range(0, 10)],
    )
    def evaluate(ppl_result) -> t.Tuple[t.List[int], t.List[int], t.List[t.List[float]]]:
    label, result, probability_matrix = [], [], []
    return label, result, probability_matrix

    @multi_classification(
    confusion_matrix_normalize="all",
    show_hamming_loss=True,
    show_cohen_kappa_score=True,
    show_roc_auc=False,
    all_labels=[i for i in range(0, 10)],
    )
    def evaluate(ppl_result) -> t.Tuple[t.List[int], t.List[int], t.List[t.List[float]]]:
    label, result = [], [], []
    return label, result

    PipelineHandler

    PipelineHandler 是一个类,提供默认的模型评测过程定义,需要用户实现 predictevaluate 函数。

    PipelineHandler 等价于 @evaluation.predict + @evaluation.evaluate,展示使用方式不一样,背后的模型评测过程一致。

    用户需要实现如下函数:

    • predict: 定义推理过程,等价于 @evaluation.predict 修饰的函数。
    • evaluate: 定义评测过程,等价于 @evaluation.evaluate 修饰的函数。
    from typing import Any, Iterator
    from abc import ABCMeta, abstractmethod

    class PipelineHandler(metaclass=ABCMeta):
    def __init__(
    self,
    predict_batch_size: int = 1,
    ignore_error: bool = False,
    predict_auto_log: bool = True,
    predict_log_mode: str = PredictLogMode.PICKLE.value,
    predict_log_dataset_features: t.Optional[t.List[str]] = None,
    **kwargs: t.Any,
    ) -> None:
    self.context = Context.get_runtime_context()
    ...

    def predict(self, data: Any, **kw: Any) -> Any:
    raise NotImplementedError

    def evaluate(self, ppl_result: Iterator) -> Any
    raise NotImplementedError

    参数

    • predict_batch_size: (int, optional)
      • 等价于 @evaluation.predict 中的 batch_size 参数。
      • 默认值为1。
    • ignore_error: (bool, optional)
      • 等价于 @evaluation.predict 中的 fail_on_error 参数。
      • 默认值为 False
    • predict_auto_log: (bool, optional)
      • 等价于 @evaluation.predict 中的 auto_log 参数。
      • 默认值为 True
    • predict_log_mode: (bool, optional)
      • 等价于 @evaluation.predict 中的 log_mode 参数。
      • 默认值为 pickle
    • predict_log_dataset_features: (bool, optional)
      • 等价于 @evaluation.predict 中的 log_dataset_features 参数。
      • 默认值为空,对记录所有 features。

    PipelineHandler.run 修饰符

    PipelineHandler.run 修饰符可以对 predictevaluate 方法进行资源描述,支持 replicasresources 的定义:

    • PipelineHandler.run 只能修饰继承自 PipelineHandler 子类中的 predictevaluate方法。
    • predict 方法可以设置 replicas 参数。evaluate 方法的 replicas 值永远为1。
    • resources 参数与 @evaluation.predict@evaluation.evaluate 中的 resources 参数定义和使用方法保持一致。
    • PipelineHandler.run 修饰器是可选的。
    • PipelineHandler.run 仅在 Server 和 Cloud 实例中生效,Standalone 实例不支持资源定义。
    @classmethod
    def run(
    cls, resources: t.Optional[t.Dict[str, t.Any]] = None, replicas: int = 1
    ) -> t.Callable:

    使用示例

    import typing as t

    import torch
    from starwhale import PipelineHandler

    class Example(PipelineHandler):
    def __init__(self) -> None:
    super().__init__()
    self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    self.model = self._load_model(self.device)

    @PipelineHandler.run(replicas=4, resources={"memory": 1 * 1024 * 1024 *1024, "nvidia.com/gpu": 1}) # 1G Memory, 1 GPU
    def predict(self, data: t.Dict):
    data_tensor = self._pre(data.img)
    output = self.model(data_tensor)
    return self._post(output)

    @PipelineHandler.run(resources={"memory": 1 * 1024 * 1024 *1024}) # 1G Memory
    def evaluate(self, ppl_result):
    result, label, pr = [], [], []
    for _data in ppl_result:
    label.append(_data["input"]["label"])
    result.extend(_data["output"][0])
    pr.extend(_data["output"][1])
    return label, result, pr

    def _pre(self, input: Image) -> torch.Tensor:
    ...

    def _post(self, input):
    ...

    def _load_model(self, device):
    ...

    Context

    执行模型评测过程中传入的上下文信息,包括Project、Task ID等。Context 的内容是自动注入的,可以通过如下方式使用:

    • 继承 PipelineHandler 类内使用 self.context 对象。
    • 通过 Context.get_runtime_context() 获取。

    需要注意,只有在模型评测过程中,才能使用Context,否则程序会抛出异常。

    目前Context可以获得如下值:

    • project: str
      • Project 名字。
    • version: str
      • 模型评测的唯一ID。
    • step: str
      • Step 名字。
    • total: int
      • Step 下所有 Task 的数量。
    • index: int
      • Task 索引标号,下标从0开始。
    • dataset_uris: List[str]
      • Starwhale 数据集的URI 列表。

    使用示例


    from starwhale import Context, PipelineHandler

    def func():
    ctx = Context.get_runtime_context()
    print(ctx.project)
    print(ctx.version)
    print(ctx.step)
    ...

    class Example(PipelineHandler):

    def predict(self, data: t.Dict):
    print(self.context.project)
    print(self.context.version)
    print(self.context.step)

    @starwhale.api.service.api

    @starwhale.api.service.api 是一个修饰器,提供基于 Gradio 的简易 Web Handler 输入定义,当用户使用 swcli model serve 命令启动 Web Service 接收外部请求,并将推理结果返回给用户,实现在线评测。

    使用示例

    import gradio
    from starwhale.api.service import api

    def predict_image(img):
    ...

    @api(gradio.File(), gradio.Label())
    def predict_view(file: t.Any) -> t.Any:
    with open(file.name, "rb") as f:
    data = Image(f.read(), shape=(28, 28, 1))
    _, prob = predict_image({"img": data})
    return {i: p for i, p in enumerate(prob)}

    starwhale.api.service.Service

    如果希望自定义 web service 的实现, 可以继承 Service 并重写 serve 函数即可。

    class CustomService(Service):
    def serve(self, addr: str, port: int, handler_list: t.List[str] = None) -> None:
    ...

    svc = CustomService()

    @svc.api(...)
    def handler(data):
    ...

    说明:

    • 使用 PipelineHandler.add_api 函数添加的 handler 和 api 以及实例化的 Service.api decorator 添加的 handler 可以同时生效
    • 如果使用自定义的 Service, 需要在 model 中实例化自定义的 Service 类

    自定义 Request 和 Response

    Request 和 Response 分别是用于接收用户请求和返回给用户结果的处理类, 可以简单的理解成是 handler 的前处理和后处理逻辑

    Starwhale 将支持 Dataset 内置类型的 Request 实现以及 Json Response 的实现, 同时用户可以自定义处理逻辑来使用, 自定义的示例如下:

    import typing as t

    from starwhale.api.service import (
    Request,
    Service,
    Response,
    )

    class CustomInput(Request):
    def load(self, req: t.Any) -> t.Any:
    return req


    class CustomOutput(Response):
    def __init__(self, prefix: str) -> None:
    self.prefix = prefix

    def dump(self, req: str) -> bytes:
    return f"{self.prefix} {req}".encode("utf-8")

    svc = Service()

    @svc.api(request=CustomInput(), response=CustomOutput("hello"))
    def foo(data: t.Any) -> t.Any:
    ...
    - + \ No newline at end of file diff --git a/zh/0.5.12/reference/sdk/job/index.html b/zh/0.5.12/reference/sdk/job/index.html index 8734722c1..c03239b1f 100644 --- a/zh/0.5.12/reference/sdk/job/index.html +++ b/zh/0.5.12/reference/sdk/job/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    Starwhale 任务 SDK

    job

    通过Job URI参数获取 starwhale.Job 对象,可以获得 Standalone/Server/Cloud 实例上的任务。

    @classmethod
    def job(
    cls,
    uri: str,
    ) -> Job:

    参数

    • uri: (str, required)
      • Job URI格式的字符串。

    使用示例

    from starwhale import job
    # get job object of uri=https://server/job/1
    j1 = job("https://server/job/1")
    # get job from standalone instance
    j2 = job("local/project/self/job/xm5wnup")
    j3 = job("xm5wnup")

    class starwhale.Job

    starwhale.Job 实现对 Starwhale 任务的抽象,能够对 Standalone/Server/Cloud 实例上的任务进行一些信息获取类的操作。

    list

    list 是一个 classmethod 方法,能够列出某个项目下的任务。

    @classmethod
    def list(
    cls,
    project: str = "",
    page_index: int = DEFAULT_PAGE_IDX,
    page_size: int = DEFAULT_PAGE_SIZE,
    ) -> t.Tuple[t.List[Job], t.Dict]:

    参数

    • project: (str, optional)
      • Project URI,Standalone/Server/Cloud 实例上的项目都可以。
      • 若不指定 project 参数,则使用 swcli project selected 命令选定的项目。
    • page_index: (int, optional)
      • 当获取 Server/Cloud 实例的项目列表时,支持翻页操作,该参数可以指定页面序号。
        • 默认值为 1。
        • 页面起始序号为 1。
      • Standalone 实例不支持翻页操作,设置该参数无效。
    • page_size: (int, optional)
      • 当获取 Server/Cloud 实例的项目列表时,支持翻页操作,该参数可以指定每页返回的任务数量。
        • 默认值为 1。
        • 页面起始序号为 1。
      • Standalone 实例不支持翻页操作,设置该参数无效。

    使用示例

    from starwhale import Job
    # list jobs of current selected project
    jobs, pagination_info = Job.list()
    # list jobs of starwhale/public project in the cloud.starwhale.cn instance
    jobs, pagination_info = Job.list("https://cloud.starwhale.cn/project/starwhale:public")
    # list jobs of id=1 project in the server instance, page index is 2, page size is 10
    jobs, pagination_info = Job.list("https://server/project/1", page_index=2, page_size=10)

    get

    get 是一个 classmethod 方法,能够获得某个特定任务的信息,返回 Starwhale.Job 对象,与 starwhale.job 函数功能和参数定义上完全一致。

    使用示例

    from starwhale import Job
    # get job object of uri=https://server/job/1
    j1 = Job.get("https://server/job/1")
    # get job from standalone instance
    j2 = Job.get("local/project/self/job/xm5wnup")
    j3 = Job.get("xm5wnup")

    summary

    summary 是一个 property 属性,返回任务运行中写入 summary 表中的数据,字典类型。

    @property
    def summary(self) -> t.Dict[str, t.Any]:

    使用示例

    from starwhale import jobs
    j1 = job("https://server/job/1")
    print(j1.summary)

    tables

    tables 是一个 property 属性,返回任务运行中创建的表名(不包括 summary 表,以为 summary 表是项目级别自动创建的),列表类型。

    @property
    def tables(self) -> t.List[str]:

    使用示例

    from starwhale import jobs
    j1 = job("https://server/job/1")
    print(j1.tables)

    get_table_rows

    get_table_rows 是一个 method 方法,可以根据表名等参数返回数据表的记录,迭代器类型。

    def get_table_rows(
    self,
    name: str,
    start: t.Any = None,
    end: t.Any = None,
    keep_none: bool = False,
    end_inclusive: bool = False,
    ) -> t.Iterator[t.Dict[str, t.Any]]:

    参数

    • name: (str, required)
      • datastore 表名。通过 tables 属性获得的表名,可以传给 name 参数。
    • start: (Any, optional)
      • 返回记录中,ID的起始值。
      • 默认值为 None,表示从头开始。
    • end: (Any, optional)
      • 返回记录中,ID的结束值。
      • 默认值为 None ,表示一直到表末尾。
      • startend 都为 None,则会以迭代器方式返回整个表的数据。
    • keep_none: (bool, optional)
      • 是否返回值为 None的记录。
      • 默认为 False。
    • end_inclusive: (bool, optional)
      • end 参数设置时,迭代记录的时候,是否包含end记录。
      • 默认为 False。

    使用示例

    from starwhale import job
    j = job("local/project/self/job/xm5wnup")
    table_name = j.tables[0]
    for row in j.get_table_rows(table_name):
    print(row)
    rows = list(j.get_table_rows(table_name, start=0, end=100))
    # return the first record from the results table
    result = list(j.get_table_rows('results', start=0, end=1))[0]
    - + \ No newline at end of file diff --git a/zh/0.5.12/reference/sdk/model/index.html b/zh/0.5.12/reference/sdk/model/index.html index cb2e28e78..1bdcf48f1 100644 --- a/zh/0.5.12/reference/sdk/model/index.html +++ b/zh/0.5.12/reference/sdk/model/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    Starwhale 模型 SDK

    model.build

    model.build 是一个函数,能够构建 Starwhale 模型,等价于 swcli model build 命令。

    def build(
    modules: t.Optional[t.List[t.Any]] = None,
    workdir: t.Optional[_path_T] = None,
    name: t.Optional[str] = None,
    project_uri: str = "",
    desc: str = "",
    remote_project_uri: t.Optional[str] = None,
    add_all: bool = False,
    tags: t.List[str] | None = None,
    ) -> None:

    参数

    • modules: (List[str|object], optional)
      • 构建时导入的模块,为列表类型,可以指定多个模块。
      • 模块类型包含两种:
        • 字符串类型: Python 可 Import 的路径,比如 "to.path.module", "to.path.module:object" 。
        • Python 对象: model.build 函数会自动解析所对应的模块。
      • 如果不指定,则会搜索当前已经导入的模块。
    • name: (str, optional)
      • Starwhale 模型的名称。
      • 若不指定,则会使用 cwd 目录名作为 Starwhale 模型的名称。
    • workdir: (str, Pathlib.Path, optional)
      • Starwhale 模型打包的根目录,此目录下的文件会被打包。
    • project_uri: (str, optional)
      • Project URI,表示该模型属于哪个项目。
      • 默认为 swcli project select 选择的项目。
    • desc: (str, optional)
      • 描述信息,默认为空。
    • remote_project_uri: (str, optional)
      • 其他示例的项目 URI,构建完Starwhale 模型后,会被自动复制到远端实例中。
    • add_all: (bool, optional)
      • Starwhale 模型打包的时候会自动忽略一些类似 pyc/venv/conda 构建目录等,可以通过该参数将这些文件也进行打包。即使该参数使用,也不影响 .swignore 文件的预期作用。
      • 默认为 False
    • tags: (List[str], optional)
      • 用户自定义标签。
      • 不能指定 latest^v\d+$ 这两个 Starwhale 系统内建标签。

    使用示例

    from starwhale import model

    # class search handlers
    from .user.code.evaluator import ExamplePipelineHandler
    model.build([ExamplePipelineHandler])

    # function search handlers
    from .user.code.evaluator import predict_image
    model.build([predict_image])

    # module handlers, @handler decorates function in this module
    from .user.code import evaluator
    model.build([evaluator])

    # str search handlers
    model.build(["user.code.evaluator:ExamplePipelineHandler"])
    model.build(["user.code1", "user.code2"])

    # no search handlers, use imported modules
    model.build()

    # add user custom tags
    model.build(tags=["t1", "t2"])
    - + \ No newline at end of file diff --git a/zh/0.5.12/reference/sdk/other/index.html b/zh/0.5.12/reference/sdk/other/index.html index 9fccfc087..985844656 100644 --- a/zh/0.5.12/reference/sdk/other/index.html +++ b/zh/0.5.12/reference/sdk/other/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    其他 SDK

    __version__

    Starwhale Python SDK 和 swcli 版本,是字符串常量。

    >>> from starwhale import __version__
    >>> print(__version__)
    0.5.7

    init_logger

    init_logger 用来设置日志输出级别。默认为0

    • 0: 输出 errors 信息,traceback 呈现最近的1个堆栈。
    • 1: 输出 errors + warnings 信息,traceback 呈现最近的5个堆栈内容。
    • 2: 输出 errors + warnings + info 信息,trackback 呈现最多10个堆栈内容。
    • 3: 输出 errors + warnings + info + debug 信息,trackback 呈现最多100个堆栈内容。
    • >=4: 输出 errors + warnings + info + debug + trace 信息,trackback 呈现最多1000个堆栈内容。
    def init_logger(verbose: int = 0) -> None:

    login

    登录 server/cloud 实例,等价于 swcli instance login 命令。登录 Standalone 实例是无意义的。

    def login(
    instance: str,
    alias: str = "",
    username: str = "",
    password: str = "",
    token: str = "",
    ) -> None:

    参数

    • instance: (str, required)
      • server/cloud 实例的 http url。
    • alias: (str, optional)
      • 实例的别名,可以简化 Starwhale URI 中 instance部分。
      • 若不指定,则使用实例的 http url 中 hostname 部分。
    • username: (str, optional)
    • password: (str, optional)
    • token: (str, optional)
      • username + passwordtoken 只能选择一种方式登录实例。

    使用示例

    from starwhale import login

    # login to Starwhale Cloud instance by token
    login(instance="https://cloud.starwhale.cn", alias="cloud-cn", token="xxx")

    # login to Starwhale Server instance by username and password
    login(instance="http://controller.starwhale.svc", alias="dev", username="starwhale", password="abcd1234")

    logout

    登出 server/cloud 实例, 等价于 swcli isntance logout 命令。登出 Standalone 实例是无意义的。

    def logout(instance: str) -> None:

    使用示例

    from starwhale import login, logout

    login(instance="https://cloud.starwhale.cn", alias="cloud-cn", token="xxx")
    # logout by the alias
    logout("cloud-cn")

    login(instance="http://controller.starwhale.svc", alias="dev", username="starwhale", password="abcd1234")
    # logout by the instance http url
    logout("http://controller.starwhale.svc")
    - + \ No newline at end of file diff --git a/zh/0.5.12/reference/sdk/overview/index.html b/zh/0.5.12/reference/sdk/overview/index.html index 7fdd30a6a..30ad4a647 100644 --- a/zh/0.5.12/reference/sdk/overview/index.html +++ b/zh/0.5.12/reference/sdk/overview/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    Python SDK 概览

    Starwhale 提供一系列的 Python SDK,帮助用户管理数据集、模型和评测等调用,使用 Starwhale Python SDK 能让您更好的完成 ML/DL 开发任务。

    • class PipelineHandler: 提供默认的模型评测过程定义,需要用户实现 predictevaluate 函数。
    • class Context: 执行模型评测过程中传入的上下文信息,包括 Project、Task ID 等。
    • class Dataset: Starwhale 数据集类。
    • class starwhale.api.service.Service: 在线评测的基础类。
    • class Job: 提供Job相关的操作。

    函数

    • @multi_classification: 修饰器,适用于多分类问题,用来简化 evaluate 结果的进一步计算和结果存储,能更好的呈现评测结果。
    • @handler: 修饰器,定义一个带有资源属性(mem/cpu/gpu)的运行实体,可以控制副本数。多个Handlers可以通过依赖关系,生成DAG,便于控制执行流程。
    • @evaluation.predict: 修饰器,定义模型评测中的推理过程,类似 MapReduce 中的 map 阶段。
    • @evaluation.evaluate: 修饰器,定义模型评测中的评测过程,类似 MapReduce 中的 reduce 阶段。
    • evaluation.log: 记录某些评测指标到特定表中。
    • evaluation.log_summary: 记录某些指标到 summary 表中。
    • evaluation.iter: 迭代读取某些表中的数据。
    • model.build: 进行 Starwhale 模型构建。
    • @fine_tune: 修饰器,定义模型训练的微调(fine-tune)过程。
    • init_logger: 设置日志输出级别,实现五种级别日志输出。
    • dataset: 获取 starwhale.Dataset 对象,包括创建新的数据集和加载已经存在的数据集两种方式。
    • @starwhale.api.service.api: 修饰器,提供基于 Gradio 的简易 Web Handler 输入定义,实现在线评测。
    • login: 登录 server/cloud 实例。
    • logout: 登出 server/cloud 实例。
    • job: 根据Job URI获得 starwhale.Job 对象。
    • @PipelineHandler.run: 修饰器,定义 PipelineHandler 子类中 predict 和 evaluate 方法的资源。

    数据类型

    • COCOObjectAnnotation: 提供COCO类型的定义。
    • BoundingBox: 边界框类型,目前为 LTWH 格式,即 left_x, top_y, widthheight
    • ClassLabel: 描述label的数量和类型。
    • Image: 图片类型。
    • GrayscaleImage: 灰度图类型,比如MNIST中数字手写体图片,是 Image 类型的一个特例。
    • Audio: 音频类型。
    • Video: 视频类型。
    • Text: 文本类型,默认为 utf-8 格式,用来存储大文本。
    • Binary: 二进制类型,用 bytes 存储,用来存储比较大的二进制内容。
    • Line: 直线类型。
    • Point: 点类型。
    • Polygon: 多边形类型。
    • Link: Link类型,用来制作 remote-link 类型的数据。
    • S3LinkAuth: 当数据存储在基于S3协议的对象存储上时,该类型负责描述授权、密钥信息。
    • MIMEType: 描述 Starwhale 支持的多媒体类型,用在 ImageVideo 等类型的 mime_type 属性上,能更好的进行 Dataset Viewer。
    • LinkType: 描述 Starwhale 支持的remote-link类型,目前支持 LocalFSS3 两种类型。

    其他

    • __version__: Starwhale Python SDK 和 swcli 版本,是字符串常量。

    进一步阅读建议

    - + \ No newline at end of file diff --git a/zh/0.5.12/reference/sdk/type/index.html b/zh/0.5.12/reference/sdk/type/index.html index 5f6e33828..fee21cb48 100644 --- a/zh/0.5.12/reference/sdk/type/index.html +++ b/zh/0.5.12/reference/sdk/type/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    Starwhale 数据类型 SDK

    COCOObjectAnnotation

    提供COCO类型的定义。

    COCOObjectAnnotation(
    id: int,
    image_id: int,
    category_id: int,
    segmentation: Union[t.List, t.Dict],
    area: Union[float, int],
    bbox: Union[BoundingBox, t.List[float]],
    iscrowd: int,
    )
    参数说明
    idobject id,一般为全局object的递增id
    image_idimage id,一般为图片id
    category_idcategory id,一般为目标检测中类别的id
    segmentation物体轮廓表示,Polygon(多边形的点)或RLE格式
    areaobject面积
    bbox表示bounding box,可以为BoundingBox类型或float的列表
    iscrowd0表示是一个单独的object,1表示两个没有分开的object

    使用示例

    def _make_coco_annotations(
    self, mask_fpath: Path, image_id: int
    ) -> t.List[COCOObjectAnnotation]:
    mask_img = PILImage.open(str(mask_fpath))

    mask = np.array(mask_img)
    object_ids = np.unique(mask)[1:]
    binary_mask = mask == object_ids[:, None, None]
    # TODO: tune permute without pytorch
    binary_mask_tensor = torch.as_tensor(binary_mask, dtype=torch.uint8)
    binary_mask_tensor = (
    binary_mask_tensor.permute(0, 2, 1).contiguous().permute(0, 2, 1)
    )

    coco_annotations = []
    for i in range(0, len(object_ids)):
    _pos = np.where(binary_mask[i])
    _xmin, _ymin = float(np.min(_pos[1])), float(np.min(_pos[0]))
    _xmax, _ymax = float(np.max(_pos[1])), float(np.max(_pos[0]))
    _bbox = BoundingBox(
    x=_xmin, y=_ymin, width=_xmax - _xmin, height=_ymax - _ymin
    )

    rle: t.Dict = coco_mask.encode(binary_mask_tensor[i].numpy()) # type: ignore
    rle["counts"] = rle["counts"].decode("utf-8")

    coco_annotations.append(
    COCOObjectAnnotation(
    id=self.object_id,
    image_id=image_id,
    category_id=1, # PennFudan Dataset only has one class-PASPersonStanding
    segmentation=rle,
    area=_bbox.width * _bbox.height,
    bbox=_bbox,
    iscrowd=0, # suppose all instances are not crowd
    )
    )
    self.object_id += 1

    return coco_annotations

    GrayscaleImage

    提供灰度图类型,比如MNIST中数字手写体图片,是 Image 类型的一个特例。

    GrayscaleImage(
    fp: _TArtifactFP = "",
    display_name: str = "",
    shape: Optional[_TShape] = None,
    as_mask: bool = False,
    mask_uri: str = "",
    )
    参数说明
    fp图片的路径、IO对象或文件内容的bytes
    display_nameDataset Viewer上展示的名字
    shape图片的Width和Height,channel默认为1
    as_mask是否作为Mask图片
    mask_uriMask原图的URI

    使用示例

    for i in range(0, min(data_number, label_number)):
    _data = data_file.read(image_size)
    _label = struct.unpack(">B", label_file.read(1))[0]
    yield GrayscaleImage(
    _data,
    display_name=f"{i}",
    shape=(height, width, 1),
    ), {"label": _label}

    GrayscaleImage函数

    GrayscaleImage.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    GrayscaleImage.carry_raw_data

    carry_raw_data() -> GrayscaleImage

    GrayscaleImage.astype

    astype() -> Dict[str, t.Any]

    BoundingBox

    提供边界框类型,目前为 LTWH 格式,即 left_x, top_y, widthheight

    BoundingBox(
    x: float,
    y: float,
    width: float,
    height: float
    )
    参数说明
    xleft_x的坐标
    ytop_y的坐标
    width图片的宽度
    height图片的高度

    ClassLabel

    描述label的数量和类型。

    ClassLabel(
    names: List[Union[int, float, str]]
    )

    Image

    图片类型。

    Image(
    fp: _TArtifactFP = "",
    display_name: str = "",
    shape: Optional[_TShape] = None,
    mime_type: Optional[MIMEType] = None,
    as_mask: bool = False,
    mask_uri: str = "",
    )
    参数说明
    fp图片的路径、IO对象或文件内容的bytes
    display_nameDataset Viewer上展示的名字
    shape图片的Width、Height和channel
    mime_typeMIMEType支持的类型
    as_mask是否作为Mask图片
    mask_uriMask原图的URI

    使用示例

    import io
    import typing as t
    import pickle
    from PIL import Image as PILImage
    from starwhale import Image, MIMEType

    def _iter_item(paths: t.List[Path]) -> t.Generator[t.Tuple[t.Any, t.Dict], None, None]:
    for path in paths:
    with path.open("rb") as f:
    content = pickle.load(f, encoding="bytes")
    for data, label, filename in zip(
    content[b"data"], content[b"labels"], content[b"filenames"]
    ):
    annotations = {
    "label": label,
    "label_display_name": dataset_meta["label_names"][label],
    }

    image_array = data.reshape(3, 32, 32).transpose(1, 2, 0)
    image_bytes = io.BytesIO()
    PILImage.fromarray(image_array).save(image_bytes, format="PNG")

    yield Image(
    fp=image_bytes.getvalue(),
    display_name=filename.decode(),
    shape=image_array.shape,
    mime_type=MIMEType.PNG,
    ), annotations

    Image函数

    Image.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Image.carry_raw_data

    carry_raw_data() -> GrayscaleImage

    Image.astype

    astype() -> Dict[str, t.Any]

    Video

    视频类型。

    Video(
    fp: _TArtifactFP = "",
    display_name: str = "",
    mime_type: Optional[MIMEType] = None,
    )
    参数说明
    fp视频的路径、IO对象或文件内容的bytes
    display_nameDataset Viewer上展示的名字
    mime_typeMIMEType支持的类型

    使用示例

    import typing as t
    from pathlib import Path

    from starwhale import Video, MIMEType

    root_dir = Path(__file__).parent.parent
    dataset_dir = root_dir / "data" / "UCF-101"
    test_ds_path = [root_dir / "data" / "test_list.txt"]

    def iter_ucf_item() -> t.Generator:
    for path in test_ds_path:
    with path.open() as f:
    for line in f.readlines():
    _, label, video_sub_path = line.split()

    data_path = dataset_dir / video_sub_path
    data = Video(
    data_path,
    display_name=video_sub_path,
    shape=(1,),
    mime_type=MIMEType.WEBM,
    )

    yield f"{label}_{video_sub_path}", {
    "video": data,
    "label": label,
    }

    Audio

    音频类型。

    Audio(
    fp: _TArtifactFP = "",
    display_name: str = "",
    mime_type: Optional[MIMEType] = None,
    )
    参数说明
    fp音频文件的路径、IO对象或文件内容的bytes
    display_nameDataset Viewer上展示的名字
    mime_typeMIMEType支持的类型

    使用示例

    import typing as t
    from starwhale import Audio

    def iter_item() -> t.Generator[t.Tuple[t.Any, t.Any], None, None]:
    for path in validation_ds_paths:
    with path.open() as f:
    for item in f.readlines():
    item = item.strip()
    if not item:
    continue

    data_path = dataset_dir / item
    data = Audio(
    data_path, display_name=item, shape=(1,), mime_type=MIMEType.WAV
    )

    speaker_id, utterance_num = data_path.stem.split("_nohash_")
    annotations = {
    "label": data_path.parent.name,
    "speaker_id": speaker_id,
    "utterance_num": int(utterance_num),
    }
    yield data, annotations

    Audio函数

    Audio.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Audio.carry_raw_data

    carry_raw_data() -> Audio

    Audio.astype

    astype() -> Dict[str, t.Any]

    Text

    文本类型,默认为 utf-8 格式。

    Text(
    content: str,
    encoding: str = "utf-8",
    )
    参数说明
    contenttext内容
    encodingtext的编码格式

    使用示例

    import typing as t
    from pathlib import Path
    from starwhale import Text

    def iter_item(self) -> t.Generator[t.Tuple[t.Any, t.Any], None, None]:
    root_dir = Path(__file__).parent.parent / "data"

    with (root_dir / "fra-test.txt").open("r") as f:
    for line in f.readlines():
    line = line.strip()
    if not line or line.startswith("CC-BY"):
    continue

    _data, _label, *_ = line.split("\t")
    data = Text(_data, encoding="utf-8")
    annotations = {"label": _label}
    yield data, annotations

    Text函数

    to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Text.carry_raw_data

    carry_raw_data() -> Text

    Text.astype

    astype() -> Dict[str, t.Any]

    Text.to_str

    to_str() -> str

    Binary

    二进制类型,用bytes存储。

    Binary(
    fp: _TArtifactFP = "",
    mime_type: MIMEType = MIMEType.UNDEFINED,
    )
    参数说明
    fp路径、IO对象或文件内容的bytes
    mime_typeMIMEType支持的类型

    Binary函数

    Binary.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Binary.carry_raw_data

    carry_raw_data() -> Binary

    Binary.astype

    astype() -> Dict[str, t.Any]

    Link类型,用来制作 remote-link 类型的数据集。

    Link(
    uri: str,
    auth: Optional[LinkAuth] = DefaultS3LinkAuth,
    offset: int = 0,
    size: int = -1,
    data_type: Optional[BaseArtifact] = None,
    )
    参数说明
    uri原始数据的uri地址,目前支持localFS和S3两种协议
    authLink Auth信息
    offset数据相对uri指向的文件偏移量
    size数据大小
    data_typeLink指向的实际数据类型,目前支持 Binary, Image, Text, AudioVideo 类型

    Link函数

    Link.astype

    astype() -> Dict[str, t.Any]

    S3LinkAuth

    当数据存储在基于S3协议的对象存储上时,该类型负责描述授权、密钥信息。

    S3LinkAuth(
    name: str = "",
    access_key: str = "",
    secret: str = "",
    endpoint: str = "",
    region: str = "local",
    )
    参数说明
    nameAuth的名称
    access_keyS3连接中的access_key
    secretS3连接中的secret
    endpointS3连接中的endpoint地址
    regionbucket所在的S3 region,默认为local

    使用示例

    import struct
    import typing as t
    from pathlib import Path

    from starwhale import (
    Link,
    S3LinkAuth,
    GrayscaleImage,
    UserRawBuildExecutor,
    )
    class LinkRawDatasetProcessExecutor(UserRawBuildExecutor):
    _auth = S3LinkAuth(name="mnist", access_key="minioadmin", secret="minioadmin")
    _endpoint = "10.131.0.1:9000"
    _bucket = "users"

    def iter_item(self) -> t.Generator[t.Tuple[t.Any, t.Any], None, None]:
    root_dir = Path(__file__).parent.parent / "data"

    with (root_dir / "t10k-labels-idx1-ubyte").open("rb") as label_file:
    _, label_number = struct.unpack(">II", label_file.read(8))

    offset = 16
    image_size = 28 * 28

    uri = f"s3://{self._endpoint}/{self._bucket}/dataset/mnist/t10k-images-idx3-ubyte"
    for i in range(label_number):
    _data = Link(
    f"{uri}",
    self._auth,
    offset=offset,
    size=image_size,
    data_type=GrayscaleImage(display_name=f"{i}", shape=(28, 28, 1)),
    )
    _label = struct.unpack(">B", label_file.read(1))[0]
    yield _data, {"label": _label}
    offset += image_size

    MIMEType

    描述Starwhale支持的多媒体类型,用Python Enum类型实现,用在 ImageVideo 等类型的mime_type 属性上,能更好的进行Dataset Viewer。

    class MIMEType(Enum):
    PNG = "image/png"
    JPEG = "image/jpeg"
    WEBP = "image/webp"
    SVG = "image/svg+xml"
    GIF = "image/gif"
    APNG = "image/apng"
    AVIF = "image/avif"
    PPM = "image/x-portable-pixmap"
    MP4 = "video/mp4"
    AVI = "video/avi"
    WEBM = "video/webm"
    WAV = "audio/wav"
    MP3 = "audio/mp3"
    PLAIN = "text/plain"
    CSV = "text/csv"
    HTML = "text/html"
    GRAYSCALE = "x/grayscale"
    UNDEFINED = "x/undefined"

    LinkType

    描述Starwhale支持的remote-link类型,用Python Enum类型实现,目前支持 LocalFSS3 两种类型。

    class LinkType(Enum):
    LocalFS = "local_fs"
    S3 = "s3"
    UNDEFINED = "undefined"

    Line

    描述直线。

    from starwhale import ds, Point, Line

    with dataset("collections") as ds:
    line_points = [
    Point(x=0.0, y=1.0),
    Point(x=0.0, y=100.0)
    ]
    ds.append({"line": line_points})
    ds.commit()

    Point

    描述点。

    from starwhale import ds, Point

    with dataset("collections") as ds:
    ds.append(Point(x=0.0, y=100.0))
    ds.commit()

    Polygon

    描述多边形。

    from starwhale import ds, Point, Polygon

    with dataset("collections") as ds:
    polygon_points = [
    Point(x=0.0, y=1.0),
    Point(x=0.0, y=100.0),
    Point(x=2.0, y=1.0),
    Point(x=2.0, y=100.0),
    ]
    ds.append({"polygon": polygon_points})
    ds.commit()
    - + \ No newline at end of file diff --git a/zh/0.5.12/reference/swcli/dataset/index.html b/zh/0.5.12/reference/swcli/dataset/index.html index 5cf90cc3a..e76dc880d 100644 --- a/zh/0.5.12/reference/swcli/dataset/index.html +++ b/zh/0.5.12/reference/swcli/dataset/index.html @@ -10,7 +10,7 @@ - + @@ -21,7 +21,7 @@ | --page | N | Integer | 1 | 起始页码,仅限Server和Cloud实例。 | | --size | N | Integer | 20 | 一页中的数据集数量,仅限Server和Cloud实例。 | | --filter-fl | N | String | | 仅显示符合条件的数据集。该选项可以在一个命令中被多次重复使用。 |

    过滤器类型说明范例
    nameKey-Value数据集名称前缀--filter name=mnist
    ownerKey-Value数据集所有者名字--filter owner=starwhale
    latestFlag如果指定了该选项,结果中仅显示最新版本。--filter latest

    swcli dataset recover

    swcli [全局选项] dataset recover [选项] <DATASET>

    dataset recover 恢复以前删除的Starwhale数据集或版本。

    DATASET 是一个数据集URI。如果URI不包含版本,则会恢复所有删除的版本。

    已经被垃圾回收或者使用 --force 选项删除的 Starwhale 数据集或版本无法使用本命令恢复。

    选项必填项类型默认值说明
    --force-fNBooleanFalse如果使用了该选项,当前同名的Starwhale数据集或版本会被强制覆盖。

    swcli dataset remove

    swcli [全局选项] dataset remove [选项] <DATASET>

    dataset remove 删除指定的 Starwhale 数据集或某个版本。

    DATASET 是一个数据集URI。如果URI不包含版本,则删除指定数据集的所有版本。软删除的 Starwhale 数据集,可以通过 swcli dataset recover 命令进行恢复(未进行垃圾回收)。

    被删除的Starwhale数据集或版本可以通过 swcli dataset list --show-removed 列出。

    选项必填项类型默认值说明
    --force-fNBooleanFalse使用此选项永久删除某个Starwhale数据集或版本。删除后不可恢复。

    swcli dataset summary

    swcli [全局选项] dataset summary <DATASET>

    显示数据集摘要信息。DATASET 是一个数据集URI

    swcli dataset tag

    swcli [全局选项] dataset tag [选项] <DATASET> [TAGS]...

    dataset tag 将标签附加到指定的Starwhale数据集版本,同时支持删除和列出所有标签的功能。可以在数据集URI中使用标签替代版本ID。

    DATASET是一个数据集URI

    每个数据集版本可以包含任意数量的标签,但同一数据集中不允许有重复的标签名称。

    选项必填项类型默认值说明
    --remove-rNBooleanFalse使用该选项删除标签
    --quiet-qNBooleanFalse使用该选项以忽略错误,例如删除不存在的标签。
    --force-add-fNBooleanFalse当向 server/cloud 实例中添加标签时,若遇到其他版本的数据集已经使用该标签会提示报错,强制更新时可以使用 --force-add 参数。

    数据集标签的例子

    #- list tags of the mnist dataset
    swcli dataset tag mnist

    #- add tags for the mnist dataset
    swcli dataset tag mnist t1 t2
    swcli dataset tag cloud://cloud.starwhale.cn/project/public:starwhale/dataset/mnist/version/latest t1 --force-ad
    swcli dataset tag mnist t1 --quiet

    #- remove tags for the mnist dataset
    swcli dataset tag mnist -r t1 t2
    swcli dataset tag cloud://cloud.starwhale.cn/project/public:starwhale/dataset/mnist --remove t1
    - + \ No newline at end of file diff --git a/zh/0.5.12/reference/swcli/index.html b/zh/0.5.12/reference/swcli/index.html index f9b8a8a85..9398c4855 100644 --- a/zh/0.5.12/reference/swcli/index.html +++ b/zh/0.5.12/reference/swcli/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    概述

    使用方式

    swcli [选项] <COMMAND> [参数]...
    备注

    swcliswstarwhale三个命令的作用是一样的。

    全局选项

    选项说明
    --version显示swcli的版本信息。
    --verbose-v日志中输出更多信息,当 -v 参数越多,呈现信息越多,最多支持4个 -v 参数。
    --help输出命令帮助信息。
    警告

    需要注意的是,全局参数需要跟在swcli之后,命令之前。

    命令

    - + \ No newline at end of file diff --git a/zh/0.5.12/reference/swcli/instance/index.html b/zh/0.5.12/reference/swcli/instance/index.html index 563c280f0..68bd86a29 100644 --- a/zh/0.5.12/reference/swcli/instance/index.html +++ b/zh/0.5.12/reference/swcli/instance/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    swcli instance

    概述

    swcli [全局选项] instance [选项] <SUBCOMMAND> [参数]

    instance命令包括以下子命令:

    • info
    • list (ls)
    • login
    • logout
    • use (select)

    swcli instance info

    swcli [全局选项] instance info [选项] <INSTANCE>

    instance info 输出指定 Starwhale 实例的详细信息。

    INSTANCE 是一个实例URI

    swcli instance list

    swcli [全局选项] instance list [选项]

    instance list 显示所有的 Starwhale 实例。

    swcli instance login

    swcli [全局选项] instance login [选项] <INSTANCE>

    instance login 连接到一个 Server/Cloud 实例并将它设置为默认实例.

    INSTANCE 是一个实例URI

    选项必填项类型默认值说明
    --usernameNString登录用户名
    --passwordNString登录密码
    --tokenNString登录令牌
    --aliasYString实例别名。您可以在任何需要实例URI的地方使用对应的别名替代。

    --username--password 不能和 --token 一起使用。

    swcli instance logout

    swcli [全局选项] instance logout [INSTANCE]

    instance logout 断开和 Server/Cloud 实例的连接并清除本地保存的信息。

    INSTANCE是一个实例URI。如果不指定,将使用默认实例

    swcli instance use

    swcli [全局选项] instance use <INSTANCE>

    instance use 将指定的实例设置为默认实例.

    INSTANCE 是一个实例URI

    - + \ No newline at end of file diff --git a/zh/0.5.12/reference/swcli/job/index.html b/zh/0.5.12/reference/swcli/job/index.html index ce2b6c25f..e4615e9be 100644 --- a/zh/0.5.12/reference/swcli/job/index.html +++ b/zh/0.5.12/reference/swcli/job/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    swcli job

    概述

    swcli [全局选项] job [选项] <子命令> [参数]...

    job命令包括以下子命令:

    • cancel
    • info
    • list(ls)
    • pause
    • recover
    • remove(rm)
    • resume

    swcli job cancel

    swcli [全局选项] job cancel [选项] <JOB>

    job cancel 停止指定的作业。该命令在 Standalone 实例下只对容器方式运行的作业生效。

    JOB 是一个作业URI

    选项必填项类型默认值说明
    --force or -fNBooleanFalse如果为真,强制停止指定的作业。

    swcli job info

    swcli [全局选项] job info [选项] <JOB>

    job info 输出指定作业的详细信息。

    JOB 是一个作业URI

    swcli job list

    swcli [全局选项] job list [选项]

    job list显示所有的 Starwhale 作业。

    选项必填项类型默认值说明
    --projectNString要查看的项目的 URI。如果未指定此选项,则使用默认项目替代。
    --show-removedNBooleanFalse如果使用了该选项,则结果中会包含已删除但未被垃圾回收的作业。
    --pageNInteger1起始页码。仅限 Server 和 Cloud 实例。
    --sizeNInteger20一页中的作业数。仅限 Server 和 Cloud 实例。

    swcli job pause

    swcli [全局选项] job pause [选项] <JOB>

    job pause 暂停指定的作业. 被暂停的作业可以使用 job resume 恢复。该命令在 Standalone 实例下只对容器方式运行的作业生效。

    JOB 是一个作业URI

    pausecancel 功能上基本相同。它们的差别在于被暂停的作业会保留作业ID,在恢复时继续使用。作业的开发者需要定期保存作业数据并在恢复的时候重新加载相关数据。作业ID 可以用作保存数据的键值。

    选项必填项类型默认值说明
    --force or -fNBooleanFalse如果为真,强制停止指定的作业。

    swcli job resume

    swcli [全局选项] job resume [选项] <JOB>

    job resume 恢复指定的作业。该命令在 Standalone 实例下只对容器方式运行的作业生效。

    JOB 是一个作业URI

    - + \ No newline at end of file diff --git a/zh/0.5.12/reference/swcli/model/index.html b/zh/0.5.12/reference/swcli/model/index.html index 766520ba8..23f7f092c 100644 --- a/zh/0.5.12/reference/swcli/model/index.html +++ b/zh/0.5.12/reference/swcli/model/index.html @@ -10,14 +10,14 @@ - +
    版本:0.5.12

    swcli model

    概述

    swcli [全局选项] model [选项] <SUBCOMMAND> [参数]...

    model命令包括以下子命令:

    • build
    • copy(cp)
    • diff
    • extract
    • history
    • info
    • list(ls)
    • recover
    • remove(rm)
    • run
    • serve
    • tag

    swcli model build

    swcli [全局选项] model build [选项] <WORKDIR>

    model build 会将整个 WORKDIR 打包到Starwhale模型中,.swignore匹配的文件除外。

    model build 会导入 --module 参数指定的模块,然后生成运行模型所需要的配置。如果您指定的模块依赖第三方库,我们强烈建议您使用 --runtime 选项。如果不指定该选项,您需要确保 swcli 所使用的 Python 环境已经安装了相关的依赖库。

    选项必填项类型默认值说明
    --project-pNString默认项目项目URI
    --model-yaml-fNString${workdir}/model.yamlmodel.yaml 文件路径,默认会尝试使用 ${workdir}/model.yaml 文件。model.yaml 对于模型构建并非必需的。
    --module-mNString构建时导入的模块。Starwhale 会将这些模块中包含的 handler 导出到模型包。该参数可以指定多次,用来导入多个 Python 模块。
    --runtimeNString运行此命令时使用的 Starwhale Runtime的URI。如果指定此选项,该命令将在 Starwhale 运行时指定的独立 Python 环境中运行。否则它将直接在 swcli 当前的 Python 环境中运行。
    --name-nNString模型包的名字
    --desc-dNString模型包的描述
    --package-runtime--no-package-runtimeNBooleanTrue当使用 --runtime 参数时,默认情况下,会将对应的 Starwhale 运行时变成 Starwhale 模型的内置运行时。可以通过 --no-package-runtime 参数禁用该特性。
    --add-allNBooleanFalseStarwhale 模型打包的时候会自动忽略一些类似 pyc/venv/conda 构建目录等,可以通过该参数将这些文件也进行打包。即使该参数使用,也不影响 .swignore 文件的预期作用。
    -t--tagN全局String

    Starwhale 模型构建的例子

    # build by the model.yaml in current directory and model package will package all the files from the current directory.
    swcli model build .
    # search model run decorators from mnist.evaluate, mnist.train and mnist.predict modules, then package all the files from the current directory to model package.
    swcli model build . --module mnist.evaluate --module mnist.train --module mnist.predict
    # build model package in the Starwhale Runtime environment.
    swcli model build . --module mnist.evaluate --runtime pytorch/version/v1
    # forbid to package Starwhale Runtime into the model.
    swcli model build . --module mnist.evaluate --runtime pytorch/version/v1 --no-package-runtime
    # build model package with tags.
    swcli model build . --tag tag1 --tag tag2

    swcli model copy

    swcli [全局选项] model copy [选项] <SRC> <DEST>

    model copy 将模型从 SRC 复制到 DEST,用来实现不同实例的模型分享。这里 SRCDEST 都是模型URI

    Starwhale 模型复制时,默认会带上用户自定义的所有标签,可以使用 --ignore-tag 参数,忽略某些标签。另外,latest^v\d+$ 标签是 Starwhale 系统内建标签,只在当前实例中使用,不会拷贝到其他实例中。

    选项必填项类型默认值说明
    --force-fNBooleanFalse如果为true,DEST已经存在时会被强制覆盖。否则此命令会显示一条错误消息。另外,如果复制时携带的标签已经被其他版本使用,通过该参数可以强制更新标签到此版本上。
    -i--ignore-tagNString可以指定多次,忽略多个用户自定义标签。

    Starwhale 模型复制的例子

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local project(myproject) with a new model name 'mnist-local'
    swcli model cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq local/project/myproject/mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local default project(self) with the cloud instance model name 'mnist-cloud'
    swcli model cp cloud://pre-k8s/project/model/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq .

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local project(myproject) with the cloud instance model name 'mnist-cloud'
    swcli model cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq . -dlp myproject

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local default project(self) with a model name 'mnist-local'
    swcli model cp cloud://pre-k8s/project/model/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local project(myproject) with a model name 'mnist-local'
    swcli model cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local -dlp myproject

    #- copy standalone instance(local) default project(self)'s mnist-local model to cloud instance(pre-k8s) mnist project with a new model name 'mnist-cloud'
    swcli model cp mnist-local/version/latest cloud://pre-k8s/project/mnist/mnist-cloud

    #- copy standalone instance(local) default project(self)'s mnist-local model to cloud instance(pre-k8s) mnist project with standalone instance model name 'mnist-local'
    swcli model cp mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy standalone instance(local) default project(self)'s mnist-local model to cloud instance(pre-k8s) mnist project without 'cloud://' prefix
    swcli model cp mnist-local/version/latest pre-k8s/project/mnist

    #- copy standalone instance(local) project(myproject)'s mnist-local model to cloud instance(pre-k8s) mnist project with standalone instance model name 'mnist-local'
    swcli model cp local/project/myproject/model/mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy without some tags
    swcli model cp mnist cloud://cloud.starwhale.cn/project/starwhale:public --ignore-tag t1

    swcli model diff

    swcli [全局选项] model diff [选项] <MODEL VERSION> <MODEL VERSION>

    model diff 比较同一模型的两个版本之间的差异。

    MODEL VERSION 是一个模型URI

    选项必填项类型默认值说明
    --show-detailsNBooleanFalse使用该选项输出详细的差异信息。

    swcli model extract

    swcli [全局选项] model extract [选项] <MODEL> <TARGET_DIR>

    model extract 能够对将Starwhale 模型解压到指定目录中,方便进行后续改造。

    MODEL 是一个模型URI

    选项必填项类型默认值说明
    --force-fNBooleanFalse如果使用了该选项,会强制覆盖目标目录已经存在的模型解压文件。

    Starwhale 模型解压的例子

    #- extract mnist model package to current directory
    swcli model extract mnist/version/xxxx .

    #- extract mnist model package to current directory and force to overwrite the files
    swcli model extract mnist/version/xxxx . -f

    swcli model history

    swcli [全局选项] model history [选项] <MODEL>

    model history输出指定Starwhale模型的所有历史版本。

    MODEL是一个模型URI

    选项必填项类型默认值说明
    --fullnameNBooleanFalse显示完整的版本名称。如果没有使用该选项,则仅显示前 12 个字符。

    swcli model info

    swcli [全局选项] model info [选项] <MODEL>

    model info输出指定Starwhale模型版本的详细信息。

    MODEL是一个模型URI

    选项必填项类型默认值说明
    --output-filter-ofNChoice of [basic/model_yaml/manifest/files/handlers/all]basic设置输出的过滤规则,比如只显示Model的model.yaml。目前该参数仅对Standalone Instance的Model生效。

    Starwhale 模型信息查看的例子

    swcli model info mnist # show basic info from the latest version of model
    swcli model info mnist/version/v0 # show basic info from the v0 version of model
    swcli model info mnist/version/latest --output-filter=all # show all info
    swcli model info mnist -of basic # show basic info
    swcli model info mnist -of model_yaml # show model.yaml
    swcli model info mnist -of handlers # show model runnable handlers info
    swcli model info mnist -of files # show model package files tree
    swcli -o json model info mnist -of all # show all info in json format

    swcli model list

    swcli [全局选项] model list [选项]

    model list显示所有的Starwhale模型。

    选项必填项类型默认值说明
    --projectNString要查看的项目的URI。如果未指定此选项,则使用默认项目替代。
    --fullnameNBooleanFalse显示完整的版本名称。如果没有使用该选项,则仅显示前 12 个字符。
    --show-removedNBooleanFalse如果使用了该选项,则结果中会包含已删除但未被垃圾回收的模型。
    --pageNInteger1起始页码。仅限Server和Cloud实例。
    --sizeNInteger20一页中的模型数。仅限Server和Cloud实例。
    --filter-flNString仅显示符合条件的模型。该选项可以在一个命令中被多次重复使用。
    过滤器类型说明范例
    nameKey-Value模型名称前缀--filter name=mnist
    ownerKey-Value模型所有者名字--filter owner=starwhale
    latestFlag如果指定了该选项,结果中仅显示最新版本。--filter latest

    swcli model recover

    swcli [全局选项] model recover [选项] <MODEL>

    model recover 恢复以前删除的 Starwhale 模型或版本。

    MODEL是一个模型URI。如果 URI 不包含版本,则会恢复所有删除的版本。

    已经被垃圾回收或者使用 --force 选项删除的 Starwhale 模型或版本无法使用本命令恢复。

    选项必填项类型默认值说明
    --force-fNBooleanFalse如果使用了该选项,当前同名的 Starwhale 模型或版本会被强制覆盖。

    swcli model remove

    swcli [全局选项] model remove [选项] <MODEL>

    model remove 删除指定的 Starwhale 模型或某个版本。

    MODEL 是一个模型URI。如果URI不包含版本,则删除指定模型的所有版本。

    被删除的 Starwhale 模型或版本可以在垃圾回收之前通过 swcli model recover 恢复。要永久删除某个Starwhale模型或版本,您可以使用 --force 选项。

    被删除的 Starwhale 模型或版本可以通过 swcli model list --show-removed 列出。

    选项必填项类型默认值说明
    --force-fNBooleanFalse使用此选项永久删除某个Starwhale模型或版本。删除后不可恢复。

    swcli model run

    swcli [全局选项] model run [选项]

    model run 运行一个模型的 Handler。该命令提供两种模式: model URI模式和本地开发模式。 model URI模式需要一个预先构建好的模型包,本地开发模式仅需要 model 代码目录即可。

    选项必填项类型默认值说明
    --workdir-wNString在本地开发模式中使用,指定 model 代码目录地址。
    --uri-uNString在model URI模式中使用,指定 model URI。
    --handler-hNString运行的Handler索引或名字,默认运行第一个Handler。格式为序号或Handler的名字。
    --module-mNStringPython Module 的名字,是一个可以被 import 的 Python Module 路径。该参数可以被设置多次。
    --runtime-rNString运行此命令时使用的Starwhale Runtime的 URI。如果指定此选项,该命令将在 Starwhale 运行时指定的独立 Python 环境中运行。否则它将直接在 swcli 当前的 Python 环境中运行。
    --model-yaml-fNString${MODEL_DIR}/model.yamlmodel.yaml 的路径。model.yaml 对于 model run 是非必须的。
    --run-project-pNString默认的 ProjectProject URI,表示 model run 的结果存储到对应的项目中。
    --dataset-dNStringDataset URI,模型运行所需要的 Starwhale 数据集。该参数可以被设置多次。
    --in-containerNBooleanFalse使用docker镜像来运行模型。此选项仅适用于 Standalone 实例。Server 和 Cloud 实例始终使用 docker 镜像。如果指定的 runtime 是基于 docker 镜像构建的,此选项总是为真。
    --forbid-snapshot-fsNBooleanFalse当在model URI模式下,每次模型运行,都会使用一个全新的快照目录,设置该参数后直接使用模型的 workdir 目录作为运行目录。本地开发模式下,此参数不生效,每次运行都是在 --workdir 指定的目录中。
    -- --user-arbitrary-argsNString你在handlers中预设的参数 赋值.

    Starwhale 模型运行的例子

    # --> run by model uri
    # run the first handler from model uri
    swcli model run -u mnist/version/latest
    # run index id(1) handler from model uri
    swcli model run --uri mnist/version/latest --handler 1
    # run index fullname(mnist.evaluator:MNISTInference.cmp) handler from model uri
    swcli model run --uri mnist/version/latest --handler mnist.evaluator:MNISTInference.cmp

    # --> run by the working directory, which does not build model package yet. Make local debug happy.
    # run the first handler from the working directory, use the model.yaml in the working directory
    swcli model run -w .
    # run index id(1) handler from the working directory, search mnist.evaluator module and model.yaml handlers(if existed) to get runnable handlers
    swcli model run --workdir . --module mnist.evaluator --handler 1
    # run index fullname(mnist.evaluator:MNISTInference.cmp) handler from the working directory, search mnist.evaluator module to get runnable handlers
    swcli model run --workdir . --module mnist.evaluator --handler mnist.evaluator:MNISTInference.cmp
    # run the f handler in th.py from the working directory with the args defined in th:f
    # @handler()
    # def f(
    # x=ListInput(IntInput()),
    # y=2,
    # mi=MyInput(),
    # ds=DatasetInput(required=True),
    # ctx=ContextInput(),
    # )
    swcli model run -w . -m th --handler th:f -- -x 2 -x=1 --mi=blab-la --ds mnist

    swcli model serve

    swcli [全局选项] model serve [选项]

    model serve 命令可以以Web Server方式运行模型,并提供简易的 Web 交互界面。

    选项必填项类型默认值说明
    --workdir-wNString在本地开发模式中使用,指定 model 代码目录地址。
    --uri-uNString在 model URI模式中使用,指定 model URI。
    --runtime-rNString运行此命令时使用的Starwhale Runtime的 URI。如果指定此选项,该命令将在 Starwhale 运行时指定的独立 Python 环境中运行。否则它将直接在 swcli 当前的 Python 环境中运行。
    --model-yaml-fNString${MODEL_DIR}/model.yamlmodel.yaml 的路径。model.yaml 对于 model serve 是非必须的。
    --module-mNStringPython Module 的名字,是一个可以被 import 的 Python Module 路径。该参数可以被设置多次。
    --hostNString127.0.0.1服务监听的地址
    --portNInteger8080服务监听的端口

    Starwhale 模型 Serving 的例子

    swcli model serve -u mnist
    swcli model serve --uri mnist/version/latest --runtime pytorch/version/latest

    swcli model serve --workdir . --runtime pytorch/version/v0
    swcli model serve --workdir . --runtime pytorch/version/v1 --host 0.0.0.0 --port 8080
    swcli model serve --workdir . --runtime pytorch --module mnist.evaluator

    swcli model tag

    swcli [全局选项] model tag [选项] <MODEL> [TAGS]...

    model tag将标签附加到指定的Starwhale模型版本,同时支持删除和列出所有标签的功能。可以在模型URI中使用标签替代版本ID。

    MODEL是一个模型URI

    每个模型版本可以包含任意数量的标签,但同一模型中不允许有重复的标签名称。

    选项必填项类型默认值说明
    --remove-rNBooleanFalse使用该选项删除标签
    --quiet-qNBooleanFalse使用该选项以忽略错误,例如删除不存在的标签。
    --force-add-fNBooleanFalse当向 server/cloud 实例中添加标签时,若遇到其他版本的模型已经使用该标签会提示报错,强制更新时可以使用 --force-add 参数。

    Starwhale 模型标签的例子

    #- list tags of the mnist model
    swcli model tag mnist

    #- add tags for the mnist model
    swcli model tag mnist t1 t2
    swcli model tag cloud://cloud.starwhale.cn/project/public:starwhale/model/mnist/version/latest t1 --force-add
    swcli model tag mnist t1 --quiet

    #- remove tags for the mnist model
    swcli model tag mnist -r t1 t2
    swcli model tag cloud://cloud.starwhale.cn/project/public:starwhale/model/mnist --remove t1
    - + \ No newline at end of file diff --git a/zh/0.5.12/reference/swcli/project/index.html b/zh/0.5.12/reference/swcli/project/index.html index 2a7041043..56a858a6d 100644 --- a/zh/0.5.12/reference/swcli/project/index.html +++ b/zh/0.5.12/reference/swcli/project/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    swcli project

    Overview

    swcli [全局选项] project [选项] <子命令> [参数]...

    project命令包括以下子命令:

    • create(add, new)
    • info
    • list(ls)
    • recover
    • remove(ls)
    • use(select)

    swcli project create

    swcli [全局选项] project create <PROJECT>

    project create 创建一个新的项目。

    PROJECT 是一个项目URI

    swcli project info

    swcli [全局选项] project info [选项] <PROJECT>

    project info 输出指定项目的详细信息。

    PROJECT 是一个项目URI

    swcli project list

    swcli [全局选项] project list [选项]

    project list 显示所有的项目。

    选项必填项类型默认值说明
    --instanceNString要显示的实例 URI。如果不指定该选项,则显示默认实例.
    --show-removedNBooleanFalse如果使用了该选项,则结果中会包含已删除但未被垃圾回收的项目。
    --pageNInteger1起始页码。仅限 Server 和 Cloud 实例。
    --sizeNInteger20一页中的项目数。仅限 Server 和 Cloud 实例。

    swcli project recover

    swcli [全局选项] project recover [选项] <PROJECT>

    project recover 恢复以前删除的项目。

    PROJECT 是一个项目URI

    已经被垃圾回收或者使用 --force 选项删除的项目无法使用本命令恢复。

    swcli project remove

    swcli [全局选项] project remove [选项] <PROJECT>

    project remove 删除指定的项目。

    PROJECT 是一个项目URI

    被删除的项目可以在垃圾回收之前通过 swcli project recover 恢复。要永久删除某个项目,您可以使用 --force 选项。

    被删除的项目可以通过 swcli project list --show-removed 列出。

    选项必填项类型默认值说明
    --force-fNBooleanFalse使用此选项永久删除某个 Starwhale 模型或版本。删除后不可恢复。

    swcli project use

    swcli [全局选项] project use <PROJECT>

    project use 将指定的项目设置为默认项目。如果要指定 Server/Cloud 实例上的项目,您需要先登录才能运行本命令。

    - + \ No newline at end of file diff --git a/zh/0.5.12/reference/swcli/runtime/index.html b/zh/0.5.12/reference/swcli/runtime/index.html index 9ca098b21..25a7bf1d1 100644 --- a/zh/0.5.12/reference/swcli/runtime/index.html +++ b/zh/0.5.12/reference/swcli/runtime/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    swcli runtime

    概述

    swcli [全局选项] runtime [选项] <SUBCOMMAND> [参数]...

    runtime 命令包括以下子命令:

    • activate(actv)
    • build
    • copy(cp)
    • dockerize
    • extract
    • history
    • info
    • list(ls)
    • recover
    • remove(rm)
    • tag

    swcli runtime activate

    swcli [全局选项] runtime activate [选项] <RUNTIME>

    runtime activate 根据指定的运行时创建一个全新的 Python 环境,类似 source venv/bin/activateconda activate xxx 的效果。关闭当前 shell 或切换到其他 shell 后,需要重新激活 Runtime。URI 参数为 Runtime URI。

    对于已经激活的 Starwhale 运行时,如果想要退出该环境,需要在 venv 环境中执行 deactivate 命令或conda环境中执行 conda deactivate 命令。

    runtime activate 命令首次激活环境的时候,会根据 Starwhale 运行时的定义,构建一个 Python 隔离环境,并下载相关的 Python Packages ,可能会花费比较的时间。

    swcli runtime build

    swcli [全局选项] runtime build [选项]

    runtime build 命令可以从多种环境或 runtime.yaml ,构建一个可以分享、可以复现的适合 ML/DL 领域的运行环境。

    参数说明

    • 运行时构建方式的相关参数:
    选项必填项类型默认值说明
    -c--condaNString通过 conda env name 寻找对应的 conda 环境,导出 Python 依赖后生成 Starwhale 运行时。
    -cp--conda-prefixNString通过 conda env prefix 路径寻找对应的 conda 环境,导出 Python 依赖后生成 Starwhale 运行时。
    -v--venvNString通过 venv 目录地址寻找对应的 venv 环境,导出 Python 依赖后生成 Starwhale 运行时。
    -s--shellNString根据当前 shell 环境,导出 Python 依赖后生成 Starwhale 运行时。
    -y--yamlNcwd 目录的 runtime.yaml根据用户自定义的 runtime.yaml 构建 Starwhale 运行时。
    -d--dockerNString将 docker image 作为 Starwhale 运行时。

    运行时构建方式的相关参数是互斥的,只能指定一种方式,如果不指定,则会采用 --yaml 方式读取 cwd 目录下的 runtime.yaml 文件进行 Starwhale 运行时的构建。

    • 其他参数:
    选项必填项作用域类型默认值说明
    --project-pN全局String默认项目项目URI
    -del--disable-env-lockNruntime.yaml 模式BooleanFalse是否安装 runtime.yaml 中的依赖,并锁定相关依赖的版本信息。默认会锁定依赖。
    -nc--no-cacheNruntime.yaml 模式BooleanFalse是否删除隔离环境,全新安装相关依赖。默认会在之前的隔离环境中安装依赖。
    --cudaNconda/venv/shell 模式Choice[11.3/11.4/11.5/11.6/11.7/]CUDA 版本,默认不使用 CUDA。
    --cudnnNconda/venv/shell 模式Choice[8/]cuDNN 版本,默认不使用 cuDNN。
    --archNconda/venv/shell 模式Choice[amd64/arm64/noarch]noarch体系结构
    -dpo--dump-pip-optionsN全局BooleanFalse~/.pip/pip.conf 导出 pip 的配置参数。
    -dcc--dump-condarcN全局BooleanFalse~/.condarc 导出 conda 的配置参数。
    -t--tagN全局String用户自定义标签,可以指定多次。

    Starwhale 运行时构建的例子

    #- from runtime.yaml:
    swcli runtime build # use the current directory as the workdir and use the default runtime.yaml file
    swcli runtime build -y example/pytorch/runtime.yaml # use example/pytorch/runtime.yaml as the runtime.yaml file
    swcli runtime build --yaml runtime.yaml # use runtime.yaml at the current directory as the runtime.yaml file
    swcli runtime build --tag tag1 --tag tag2

    #- from conda name:
    swcli runtime build -c pytorch # lock pytorch conda environment and use `pytorch` as the runtime name
    swcli runtime build --conda pytorch --name pytorch-runtime # use `pytorch-runtime` as the runtime name
    swcli runtime build --conda pytorch --cuda 11.4 # specify the cuda version
    swcli runtime build --conda pytorch --arch noarch # specify the system architecture

    #- from conda prefix path:
    swcli runtime build --conda-prefix /home/starwhale/anaconda3/envs/pytorch # get conda prefix path by `conda info --envs` command

    #- from venv prefix path:
    swcli runtime build -v /home/starwhale/.virtualenvs/pytorch
    swcli runtime build --venv /home/starwhale/.local/share/virtualenvs/pytorch --arch amd64

    #- from docker image:
    swcli runtime build --docker pytorch/pytorch:1.9.0-cuda11.1-cudnn8-runtime # use the docker image as the runtime directly

    #- from shell:
    swcli runtime build -s --cuda 11.4 --cudnn 8 # specify the cuda and cudnn version
    swcli runtime build --shell --name pytorch-runtime # lock the current shell environment and use `pytorch-runtime` as the runtime name

    swcli runtime copy

    swcli [全局选项] runtime copy [选项] <SRC> <DEST>

    runtime copy 将 runtime 从 SRC 复制到 DEST,可以实现不同实例之间的运行时分享。这里 SRCDEST 都是运行时URI

    Starwhale 运行时复制时,默认会带上用户自定义的所有标签,可以使用 --ignore-tag 参数,忽略某些标签。另外,latest^v\d+$ 标签是 Starwhale 系统内建标签,只在当前实例中使用,不会拷贝到其他实例中。

    选项必填项类型默认值说明
    --force-fNBooleanFalse如果为true,DEST已经存在时会被强制覆盖。否则此命令会显示一条错误消息。另外,如果复制时携带的标签已经被其他版本使用,通过该参数可以强制更新标签到此版本上。
    -i--ignore-tagNString可以指定多次,忽略多个用户自定义标签。

    Starwhale 运行时复制的例子

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local project(myproject) with a new runtime name 'mnist-local'
    swcli runtime cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq local/project/myproject/mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local default project(self) with the cloud instance runtime name 'mnist-cloud'
    swcli runtime cp cloud://pre-k8s/project/runtime/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq .

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local project(myproject) with the cloud instance runtime name 'mnist-cloud'
    swcli runtime cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq . -dlp myproject

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local default project(self) with a runtime name 'mnist-local'
    swcli runtime cp cloud://pre-k8s/project/runtime/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local project(myproject) with a runtime name 'mnist-local'
    swcli runtime cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local -dlp myproject

    #- copy standalone instance(local) default project(self)'s mnist-local runtime to cloud instance(pre-k8s) mnist project with a new runtime name 'mnist-cloud'
    swcli runtime cp mnist-local/version/latest cloud://pre-k8s/project/mnist/mnist-cloud

    #- copy standalone instance(local) default project(self)'s mnist-local runtime to cloud instance(pre-k8s) mnist project with standalone instance runtime name 'mnist-local'
    swcli runtime cp mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy standalone instance(local) default project(self)'s mnist-local runtime to cloud instance(pre-k8s) mnist project without 'cloud://' prefix
    swcli runtime cp mnist-local/version/latest pre-k8s/project/mnist

    #- copy standalone instance(local) project(myproject)'s mnist-local runtime to cloud instance(pre-k8s) mnist project with standalone instance runtime name 'mnist-local'
    swcli runtime cp local/project/myproject/runtime/mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy without some tags
    swcli runtime cp pytorch cloud://cloud.starwhale.cn/project/starwhale:public --ignore-tag t1

    swcli runtime dockerize

    swcli [全局选项] runtime dockerize [选项] <RUNTIME>

    runtime dockerize 基于指定的 runtime 创建一个 docker 镜像。Starwhale 使用 docker buildx 来创建镜像。运行此命令需要预先安装 Docker 19.03 以上的版本。

    RUNTIME是一个运行时URI

    选项必填项类型默认值说明
    --tag or -tNStringDocker镜像的tag,该选项可以重复多次。
    --pushNBooleanFalse是否将创建的镜像推送到docker registry。
    --platformNStringamd64镜像的运行平台,可以是amd64或者arm64。该选项可以重复多次用于创建多平台镜像。
    --dry-runNBooleanFalse只生成 Dockerfile 不实际生成和推送镜像。

    swcli runtime extract

    swcli [全局选项] runtime extract [选项] <RUNTIME>

    Starwhale 运行时以压缩包的方式分发,使用 runtime extract 命令可以解压运行时 Package,然后进行后续的自定义修改。

    选项必填项类型默认值说明
    --force-fNBooleanFalse如果目标目录已经有解压好的 Starwhale 运行时,是否删除后重新解压。
    --target-dirNString自定义解压的目录,如果不指定则会放到 Starwhale 默认的运行时 workdir 目录中,命令输出日志中会提示。

    swcli runtime history

    swcli [全局选项] runtime history [选项] <RUNTIME>

    runtime history输出指定Starwhale运行时的所有历史版本。

    RUNTIME是一个运行时URI

    选项必填项类型默认值说明
    --fullnameNBooleanFalse显示完整的版本名称。如果没有使用该选项,则仅显示前 12 个字符。

    swcli runtime info

    swcli [全局选项] runtime info [选项] <RUNTIME>

    runtime info输出指定Starwhale运行时版本的详细信息。

    RUNTIME是一个运行时URI

    选项必填项类型默认值说明
    --output-filter-ofNChoice of [basic/runtime_yaml/manifest/lock/all]basic设置输出的过滤规则,比如只显示Runtime的runtime.yaml。目前该参数仅对Standalone Instance的Runtime生效。

    Starwhale 运行时查看详情的例子

    swcli runtime info pytorch # show basic info from the latest version of runtime
    swcli runtime info pytorch/version/v0 # show basic info
    swcli runtime info pytorch/version/v0 --output-filter basic # show basic info
    swcli runtime info pytorch/version/v1 -of runtime_yaml # show runtime.yaml content
    swcli runtime info pytorch/version/v1 -of lock # show auto lock file content
    swcli runtime info pytorch/version/v1 -of manifest # show _manifest.yaml content
    swcli runtime info pytorch/version/v1 -of all # show all info of the runtime

    swcli runtime list

    swcli [全局选项] runtime list [选项]

    runtime list显示所有的 Starwhale 运行时。

    选项必填项类型默认值说明
    --projectNString要查看的项目的URI。如果未指定此选项,则使用默认项目替代。
    --fullnameNBooleanFalse显示完整的版本名称。如果没有使用该选项,则仅显示前 12 个字符。
    --show-removedNBooleanFalse如果使用了该选项,则结果中会包含已删除但未被垃圾回收的运行时。
    --pageNInteger1起始页码。仅限Server和Cloud实例。
    --sizeNInteger20一页中的运行时数量。仅限Server和Cloud实例。
    --filter-flNString仅显示符合条件的运行时。该选项可以在一个命令中被多次重复使用。
    过滤器类型说明范例
    nameKey-Value运行时名称前缀--filter name=pytorch
    ownerKey-Value运行时所有者名字--filter owner=starwhale
    latestFlag如果指定了该选项,结果中仅显示最新版本。--filter latest

    swcli runtime recover

    swcli [全局选项] runtime recover [选项] <RUNTIME>

    runtime recover 命令可以恢复以前删除的 Starwhale 运行时。

    RUNTIME是一个运行时URI。如果URI不包含版本,则会恢复所有删除的版本。

    已经被垃圾回收或者使用 --force 选项删除的 Starwhale 运行时或版本无法使用本命令恢复。

    选项必填项类型默认值说明
    --force-fNBooleanFalse如果使用了该选项,当前同名的Starwhale运行时或版本会被强制覆盖。

    swcli runtime remove

    swcli [全局选项] runtime remove [选项] <RUNTIME>

    runtime remove 命令可以删除指定的 Starwhale 运行时或某个版本。

    RUNTIME 是一个运行时URI。如果 URI 不包含版本,则删除所有版本。

    被删除的 Starwhale 运行时或版本可以在垃圾回收之前通过 swcli runtime recover 命令恢复。要永久删除某个 Starwhale 运行时或版本,您可以使用 --force 选项。

    被删除的 Starwhale 运行时或版本可以通过 swcli runtime list --show-removed 列出。

    选项必填项类型默认值说明
    --force-fNBooleanFalse使用此选项永久删除某个 Starwhale 运行时或版本。删除后不可恢复。

    swcli runtime tag

    swcli [全局选项] runtime tag [选项] <RUNTIME> [TAGS]...

    runtime tag 命令将标签附加到指定的 Starwhale 运行时版本,同时支持删除和列出所有标签的功能。可以在运行时URI中使用标签替代版本 ID。

    RUNTIME 是一个运行时URI

    每个运行时版本可以包含任意数量的标签,但同一运行时中不允许有重复的标签名称。

    runtime tag仅适用于 Standalone 实例.

    选项必填项类型默认值说明
    --remove-rNBooleanFalse使用该选项删除标签
    --quiet-qNBooleanFalse使用该选项以忽略错误,例如删除不存在的标签。
    --force-add-fNBooleanFalse当向 server/cloud 实例中添加标签时,若遇到其他版本的运行时已经使用该标签会提示报错,强制更新时可以使用 --force-add 参数。

    Starwhale 运行时标签的例子

    #- list tags of the pytorch runtime
    swcli runtime tag pytorch

    #- add tags for the pytorch runtime
    swcli runtime tag mnist t1 t2
    swcli runtime tag cloud://cloud.starwhale.cn/project/public:starwhale/runtime/pytorch/version/latest t1 --force-add
    swcli runtime tag mnist t1 --quiet

    #- remove tags for the pytorch runtime
    swcli runtime tag mnist -r t1 t2
    swcli runtime tag cloud://cloud.starwhale.cn/project/public:starwhale/runtime/pytorch --remove t1
    - + \ No newline at end of file diff --git a/zh/0.5.12/reference/swcli/utilities/index.html b/zh/0.5.12/reference/swcli/utilities/index.html index cefed7873..dd5f30f91 100644 --- a/zh/0.5.12/reference/swcli/utilities/index.html +++ b/zh/0.5.12/reference/swcli/utilities/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    其他命令

    swcli gc

    swcli [全局选项] gc [选项]

    gc根据内部的垃圾回收策略清理已经被删除的项目、模型、数据集和运行时。

    选项必填项类型默认值说明
    --dry-runNBooleanFalse如果为真,仅输出将被删除的对象而不清理。
    --yesNBooleanFalse跳过所有需要确认的项目。

    swcli check

    swcli [全局选项] check

    检查 swcli 命令的外部依赖是否满足条件,目前主要检查 Docker 和 Conda。

    swcli completion install

    swcli [全局选项] completion install <SHELL_NAME>

    安装 swcli 命令补全,目前支持 bash, zsh 和 fish。如果不指定 SHELL_NAME,则尝试主动探测当前shell类型。

    swcli config edit

    swcli [全局选项] config edit

    编辑 Starwhale 配置文件,即 ~/.config/starwhale/config.yaml

    swcli ui

    swcli [全局选项] ui <INSTANCE>

    打开对应实例的Web页面。

    - + \ No newline at end of file diff --git a/zh/0.5.12/runtime/index.html b/zh/0.5.12/runtime/index.html index 159244373..08afef977 100644 --- a/zh/0.5.12/runtime/index.html +++ b/zh/0.5.12/runtime/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    Starwhale 运行时

    概览

    Starwhale 运行时能够针对运行Python程序,提供一种可复现、可分享的运行环境。使用 Starwhale 运行时,可以非常容易的与他人分享,并且能在 Starwhale Server 和 Starwhale Cloud 实例上使用 Starwhale 运行时。

    Starwhale 运行时使用 venv, conda 和 docker 等基础技术,如果您当前正在使用这些技术,可以非常容易的将这个环境转化为 Starwhale 运行时。

    对于本地环境,Starwhale 运行时支持非常容易的多种环境管理和切换。Starwhale 运行时包含基础镜像和环境依赖两个部分。

    基础镜像

    Starwhale 基础镜像中会安装 Python, CUDA, cuDNN 和其他一些机器学习开发中必要的基础库。Starwhale 运行时提供多种基础镜像供选择,列表如下:

    • 体系结构:
      • X86 (amd64)
      • Arm (aarch64)
    • 操作系统:
      • Ubuntu 20.04 LTS (ubuntu:20.04)
    • Python:
      • 3.7
      • 3.8
      • 3.9
      • 3.10
      • 3.11
    • CUDA:
      • CUDA 11.3 + cuDNN 8.4
      • CUDA 11.4 + cuDNN 8.4
      • CUDA 11.5 + cuDNN 8.4
      • CUDA 11.6 + cuDNN 8.4
      • CUDA 11.7

    runtime.yaml 通过相关设置来决定使用何种基础镜像。

    - + \ No newline at end of file diff --git a/zh/0.5.12/runtime/yaml/index.html b/zh/0.5.12/runtime/yaml/index.html index 728bf9e17..2752aba2b 100644 --- a/zh/0.5.12/runtime/yaml/index.html +++ b/zh/0.5.12/runtime/yaml/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    runtime.yaml 使用指南

    runtime.yaml 是构建 Starwhale 运行时的描述文件,用户可以细粒度的定义 Starwhale 运行时的各种属性。当使用 swcli runtime build 命令中 yaml 模式时,需要提供 runtime.yaml 文件。

    使用示例

    最简示例

    dependencies:
    - pip:
    - numpy
    name: simple-test

    定义一个以 venv 作为Python 包隔离方式,安装numpy依赖的 Starwhale 运行时。

    llama2 示例

    name: llama2
    mode: venv
    environment:
    arch: noarch
    os: ubuntu:20.04
    cuda: 11.7
    python: "3.10"
    dependencies:
    - pip:
    - torch
    - fairscale
    - fire
    - sentencepiece
    - gradio >= 3.37.0
    # external starwhale dependencies
    - starwhale[serve] >= 0.5.5

    完整字段示例

    # [required]The name of Starwhale Runtime
    name: demo
    # [optional]The mode of Starwhale Runtime: venv or conda. Default is venv.
    mode: venv
    # [optional]The configurations of pip and conda.
    configs:
    # If you do not use conda, ignore this field.
    conda:
    condarc: # custom condarc config file
    channels:
    - defaults
    show_channel_urls: true
    default_channels:
    - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main
    - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/r
    - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/msys2
    custom_channels:
    conda-forge: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
    pytorch: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
    pytorch-lts: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
    nvidia: https://mirrors.aliyun.com/anaconda/cloud
    ssl_verify: false
    default_threads: 10
    pip:
    # pip config set global.index-url
    index_url: https://example.org/
    # pip config set global.extra-index-url
    extra_index_url: https://another.net/
    # pip config set install.trusted-host
    trusted_host:
    - example.org
    - another.net
    # [optional] The definition of the environment.
    environment:
    # Now it must be ubuntu:20.04
    os: ubuntu:20.04
    # CUDA version. possible values: 11.3, 11.4, 11.5, 11.6, 11.7
    cuda: 11.4
    # Python version. possible values: 3.7, 3.8, 3.9, 3.10, 3.11
    python: 3.8
    # Define your custom base image
    docker:
    image: mycustom.com/docker/image:tag
    # [required] The dependencies of the Starwhale Runtime.
    dependencies:
    # If this item is present, conda env create -f conda.yml will be executed
    - conda.yaml
    # If this item is present, pip install -r requirements.txt will be executed before installing other pip packages
    - requirements.txt
    # Packages to be install with conda. venv mode will ignore the conda field.
    - conda:
    - numpy
    - requests
    # Packages to be installed with pip. The format is the same as requirements.txt
    - pip:
    - pillow
    - numpy
    - deepspeed==0.9.0
    - safetensors==0.3.0
    - transformers @ git+https://github.com/huggingface/transformers.git@3c3108972af74246bc3a0ecf3259fd2eafbacdef
    - peft @ git+https://github.com/huggingface/peft.git@fcff23f005fc7bfb816ad1f55360442c170cd5f5
    - accelerate @ git+https://github.com/huggingface/accelerate.git@eba6eb79dc2ab652cd8b44b37165a4852768a8ac
    # Additional wheels packages to be installed when restoring the runtime
    - wheels:
    - dummy-0.0.0-py3-none-any.whl
    # Additional files to be included in the runtime
    - files:
    - dest: bin/prepare.sh
    name: prepare
    src: scripts/prepare.sh
    # Run some custom commands
    - commands:
    - apt-get install -y libgl1
    - touch /tmp/runtime-command-run.flag
    - + \ No newline at end of file diff --git a/zh/0.5.12/server/guides/server_admin/index.html b/zh/0.5.12/server/guides/server_admin/index.html index d5e6412e4..667acfcba 100644 --- a/zh/0.5.12/server/guides/server_admin/index.html +++ b/zh/0.5.12/server/guides/server_admin/index.html @@ -10,14 +10,14 @@ - +
    版本:0.5.12

    Starwhale Server 系统设置

    超级管理员密码重置

    一旦您忘记了超级管理员的密码, 您可以通过下面的SQL语句将密码重置为 abcd1234

    update user_info set user_pwd='ee9533077d01d2d65a4efdb41129a91e', user_pwd_salt='6ea18d595773ccc2beacce26' where id=1

    重置后,您可以使用上述密码登录到console。 然后再次修改密码为您想要的密码。

    系统设置

    您可以在 Starwhale Server Web 界面中对系统设置进行更改,目前支持runtime的docker镜像源修改以及资源池的划分等。下面是系统设置的一个例子:

    dockerSetting:
    registryForPull: "docker-registry.starwhale.cn/star-whale"
    registryForPush: ""
    userName: ""
    password: ""
    insecure: true
    pypiSetting:
    indexUrl: ""
    extraIndexUrl: ""
    trustedHost: ""
    retries: 10
    timeout: 90
    imageBuild:
    resourcePool: ""
    image: ""
    clientVersion: ""
    pythonVersion: ""
    datasetBuild:
    resourcePool: ""
    image: ""
    clientVersion: ""
    pythonVersion: ""
    resourcePoolSetting:
    - name: "default"
    nodeSelector: null
    resources:
    - name: "cpu"
    max: null
    min: null
    defaults: 5.0
    - name: "memory"
    max: null
    min: null
    defaults: 3145728.0
    - name: "nvidia.com/gpu"
    max: null
    min: null
    defaults: null
    tolerations: null
    metadata: null
    isPrivate: null
    visibleUserIds: null
    storageSetting:
    - type: "minio"
    tokens:
    bucket: "users"
    ak: "starwhale"
    sk: "starwhale"
    endpoint: "http://10.131.0.1:9000"
    region: "local"
    hugeFileThreshold: "10485760"
    hugeFilePartSize: "5242880"
    - type: "s3"
    tokens:
    bucket: "users"
    ak: "starwhale"
    sk: "starwhale"
    endpoint: "http://10.131.0.1:9000"
    region: "local"
    hugeFileThreshold: "10485760"
    hugeFilePartSize: "5242880"

    镜像源设置

    Server 下发的 Tasks 都是基于 docker 实现的,Starwhale Server 支持自定义镜像源,包括 dockerSetting.registryForPushdockerSetting.registryForPull

    资源池设置

    资源池实现了集群机器分组的功能。用户在创建任务时可以通过选择资源池将自己的任务下发到想要的机器组中。资源池可以理解为 Kubernetes 中的 nodeSelector,所以当您在K8S集群中给机器打上标签后,就可以在这里配置您的 resourcePool

    存储设置

    您可以通过存储设置来配置 Starwhale Server可以访问那些存储介质:

    storageSetting:
    - type: s3
    tokens:
    - bucket: starwhale # required
    ak: access_key # required
    sk: scret_key # required
    endpoint: http://s3.region.amazonaws.com # optional
    region: region of the service # required when endpoint is empty
    hugeFileThreshold: 10485760 # bigger than 10MB will use multiple part upload
    hugeFilePartSize: 5242880 # MB part size for multiple part upload
    - type: minio
    tokens:
    - bucket: starwhale # required
    ak: access_key # required
    sk: scret_key # required
    endpoint: http://10.131.0.1:9000 # required
    region: local # optional
    hugeFileThreshold: 10485760 # bigger than 10MB will use multiple part upload
    hugeFilePartSize: 5242880 # MB part size for multiple part upload
    - type: aliyun
    tokens:
    - bucket: starwhale # required
    ak: access_key # required
    sk: scret_key # required
    endpoint: http://10.131.0.2:9000 # required
    region: local # optional
    hugeFileThreshold: 10485760 # bigger than 10MB will use multiple part upload
    hugeFilePartSize: 5242880 # MB part size for multiple part upload

    每一个 storageSetting 条目都应该有一个StorageAccessService接口的实现. Starwhale目前有四个内置的实现:

    • StorageAccessServiceAliyun 可以处理 typealiyun 或者 oss 的条目
    • StorageAccessServiceMinio 可以处理typeminio 的条目
    • StorageAccessServiceS3 可以处理 types3 的条目
    • StorageAccessServiceFile 可以处理 typefs 或者 file 的条目

    不同的实现对 tokens 的要求是不一样的. 当 typealiyunminio或者oss的时候 endpoint 是 必填的。 当 endpoint 为空并且 types3 的时候 region 必填的。 而 fs/file 类型的存储则需要 rootDirserviceProvider 作为tokens的key. 更多细节请参阅代码。

    - + \ No newline at end of file diff --git a/zh/0.5.12/server/index.html b/zh/0.5.12/server/index.html index bcbd92f68..71c07290f 100644 --- a/zh/0.5.12/server/index.html +++ b/zh/0.5.12/server/index.html @@ -10,13 +10,13 @@ - + - + \ No newline at end of file diff --git a/zh/0.5.12/server/installation/docker-compose/index.html b/zh/0.5.12/server/installation/docker-compose/index.html index f8b980b22..03d51ce76 100644 --- a/zh/0.5.12/server/installation/docker-compose/index.html +++ b/zh/0.5.12/server/installation/docker-compose/index.html @@ -10,14 +10,14 @@ - +
    版本:0.5.12

    使用Docker Compose安装Starwhale

    先决条件

    安装方法

    启动服务

    wget https://raw.githubusercontent.com/star-whale/starwhale/main/docker/compose/compose.yaml
    GLOBAL_IP=${your_accessible_ip_for_server} ; docker compose up

    GLOBAL_IP 需要是可以被所有 swcli 访问到的,包括用户实际使用的swcli和container内部的swcli. 如果不能访问,请确认您的防火墙设置.

    compose.yaml 包含了Mysql数据库,MinIO存储和Controller服务. 创建一个 compose.override.yaml, 可以覆盖 compose.yaml 中的配置. 如何配置可以参考此处

    - + \ No newline at end of file diff --git a/zh/0.5.12/server/installation/docker/index.html b/zh/0.5.12/server/installation/docker/index.html index 7984b756e..4c3cb5fc7 100644 --- a/zh/0.5.12/server/installation/docker/index.html +++ b/zh/0.5.12/server/installation/docker/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    使用 Docker 安装 Starwhale Server

    先决条件

    • 1.19或者更高版本的Kubernetes集群用于执行任务。
    • MySQL 8.0以上版本的数据库实例用于存储元数据。
    • 兼容S3接口的对象存储,用于保存数据集、模型等。

    请确保您的Kubernetes集群上的pod可以访问Starwhale Server侦听的端口。

    为Docker准备env文件

    Starwhale Server可以通过环境变量进行配置。

    Docker的env文件模板参考此处。您可以通过修改模板来创建自己的env文件。

    准备kubeconfig文件[可选][SW_SCHEDULER=k8s]

    kubeconfig文件用于访问Kubernetes集群。 有关kubeconfig文件的更多信息,请参阅官方Kubernetes文档

    如果您安装了kubectl命令行工具,可以运行 kubectl config view 来查看您当前的配置。

    启动Docker镜像

    docker run -it -d --name starwhale-server -p 8082:8082 \
    --restart unless-stopped \
    --mount type=bind,source=<您的kubeconfig文件路径>,destination=/root/.kube/config,readonly \
    --env-file <您的env文件路径> \
    docker-registry.starwhale.cn/star-whale/server:0.5.6

    对于非中国大陆网络用户,可以使用托管在 ghcr.io 上的镜像: ghcr.io/star-whale/server

    - + \ No newline at end of file diff --git a/zh/0.5.12/server/installation/helm-charts/index.html b/zh/0.5.12/server/installation/helm-charts/index.html index 54cca5977..ff6c9cb4d 100644 --- a/zh/0.5.12/server/installation/helm-charts/index.html +++ b/zh/0.5.12/server/installation/helm-charts/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    使用 Helm 安装 Starwhale Server

    先决条件

    • 1.19或者更高版本的Kubernetes集群用于执行任务。
    • MySQL 8.0以上版本的数据库实例用于存储元数据。
    • 兼容S3接口的对象存储,用于保存数据集、模型等。
    • Helm 3.2.0+。

    Starwhale Helm charts 包括 MySQL 和 MinIO 作为依赖项。如果您没有自己的 MySQL 实例或任何与 AWS S3 兼容的对象存储可用,可以通过 Helm Chats 进行安装。请查看下文的安装选项以了解如何在安装 Starwhale Server 的同时安装 MySQL 和 MinIO。

    在 Kubernetes 上为 Starwhale Server 创建一个服务账号

    如果您的 Kubernetes 集群启用了 RBAC(在 Kubernetes 1.6+中,默认启用 RBAC),Starwhale Server 将无法正常工作,除非由至少具有以下权限的服务帐户启动:

    ResourceAPI GroupGetListWatchCreateDelete
    jobsbatchYYYYY
    podscoreYYY
    nodescoreYYY
    events""Y

    例子:

    apiVersion: rbac.authorization.k8s.io/v1
    kind: ClusterRole
    metadata:
    name: starwhale-role
    rules:
    - apiGroups:
    - ""
    resources:
    - pods
    - nodes
    verbs:
    - get
    - list
    - watch
    - apiGroups:
    - "batch"
    resources:
    - jobs
    verbs:
    - create
    - get
    - list
    - watch
    - delete
    - apiGroups:
    - ""
    resources:
    - events
    verbs:
    - get
    - watch
    - list
    ---
    apiVersion: rbac.authorization.k8s.io/v1
    kind: ClusterRoleBinding
    metadata:
    name: starwhale-binding
    roleRef:
    apiGroup: rbac.authorization.k8s.io
    kind: ClusterRole
    name: starwhale-role
    subjects:
    - kind: ServiceAccount
    name: starwhale

    下载 Starwhale Helm chart

    helm repo add starwhale https://star-whale.github.io/charts
    helm repo update

    安装Starwhale Server

    helm install starwhale-server starwhale/starwhale-server -n starwhale --create-namespace

    如果您安装了kubectl命令行工具,您可以运行 kubectl get pods -n starwhale 来检查是否所有 pod 都在正常运行中。

    更新 Starwhale Server

    helm repo update
    helm upgrade starwhale-server starwhale/starwhale-server

    卸载 Starwhale Server

    helm delete starwhale-server
    - + \ No newline at end of file diff --git a/zh/0.5.12/server/installation/index.html b/zh/0.5.12/server/installation/index.html index 7576b3026..6bb462728 100644 --- a/zh/0.5.12/server/installation/index.html +++ b/zh/0.5.12/server/installation/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    Starwhale Server 安装指南

    Starwhale Server 以 Docker 镜像的形式发布。您可以直接使用 Docker 运行,也可以部署到 Kubernetes 集群上。

    - + \ No newline at end of file diff --git a/zh/0.5.12/server/installation/minikube/index.html b/zh/0.5.12/server/installation/minikube/index.html index 5c8677794..c58ae71f4 100644 --- a/zh/0.5.12/server/installation/minikube/index.html +++ b/zh/0.5.12/server/installation/minikube/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    使用 Minikube 安装 Starwhale Server

    先决条件

    启动 Minikube

    minikube start --addons ingress --image-mirror-country=cn --kubernetes-version=1.25.3

    对于非中国大陆网络用户,可以省略 --image-mirror-country=cn 参数。另外,如果在您的机器上没有安装 kubectl,可以使用 Minikube 自带的 kubectl: minikube kubectl 或 bashrc中增加 alias kubectl="minikube kubectl --"

    安装 Starwhale Server

    helm repo add starwhale https://star-whale.github.io/charts
    helm repo update
    helm pull starwhale/starwhale --untar --untardir ./charts

    helm upgrade --install starwhale ./charts/starwhale -n starwhale --create-namespace -f ./charts/starwhale/values.minikube.cn.yaml

    对于非中国大陆网络用户,可以使用 values.minikube.global.yaml,命令如下:

    helm upgrade --install starwhale ./charts/starwhale -n starwhale --create-namespace -f ./charts/starwhale/values.minikube.global.yaml

    当成功安装后,会有类似如下的提示信息输出:

        Release "starwhale" has been upgraded. Happy Helming!
    NAME: starwhale
    LAST DEPLOYED: Tue Feb 14 16:25:03 2023
    NAMESPACE: starwhale
    STATUS: deployed
    REVISION: 14
    NOTES:
    ******************************************
    Chart Name: starwhale
    Chart Version: 0.5.6
    App Version: latest
    Starwhale Image:
    - server: ghcr.io/star-whale/server:latest

    ******************************************
    Controller:
    - visit: http://controller.starwhale.svc
    Minio:
    - web visit: http://minio.starwhale.svc
    - admin visit: http://minio-admin.starwhale.svc
    MySQL:
    - port-forward:
    - run: kubectl port-forward --namespace starwhale svc/mysql 3306:3306
    - visit: mysql -h 127.0.0.1 -P 3306 -ustarwhale -pstarwhale
    Please run the following command for the domains searching:
    echo "$(sudo minikube ip) controller.starwhale.svc minio.starwhale.svc minio-admin.starwhale.svc " | sudo tee -a /etc/hosts
    ******************************************
    Login Info:
    - starwhale: u:starwhale, p:abcd1234
    - minio admin: u:minioadmin, p:minioadmin

    *_* Enjoy to use Starwhale Platform. *_*

    检查 Starwhale Server 状态

    Minikube 方式启动 Starwhale Server 一般要用时3-5分钟,可以输出如下命令检查是否完成启动:

    kubectl get deployments -n starwhale
    NAMEREADYUP-TO-DATEAVAILABLEAGE
    controller1/1115m
    minio1/1115m
    mysql1/1115m

    本机访问的网络配置

    输出如下命令后,就可以在浏览器中通过 http://controller.starwhale.svc 访问 Starwhale Server:

    echo "$(sudo minikube ip) controller.starwhale.svc minio.starwhale.svc  minio-admin.starwhale.svc " | sudo tee -a /etc/hosts

    其他机器访问的网络配置

    • 步骤1: 在 Starwhale Server 所在机器上

      使用 socat 命令做临时的端口转发,命令如下:

      # install socat at first, ref: https://howtoinstall.co/en/socat
      sudo socat TCP4-LISTEN:80,fork,reuseaddr,bind=0.0.0.0 TCP4:`minikube ip`:80

      当您停掉socat进程后,端口转发会被禁止,其他机器的访问也会被禁止。如果想长期开启端口转发,可以使用 iptables 命令。

    • 步骤2: 在其他机器上

      在 hosts 文件添加相关域名映射,命令如下:

      # for macOSX or Linux environment, run the command in the shell.
      echo ${your_machine_ip} controller.starwhale.svc minio.starwhale.svc minio-admin.starwhale.svc " | sudo tee -a /etc/hosts

      # for Windows environment, run the command in the PowerShell with administrator permission.
      Add-Content -Path C:\Windows\System32\drivers\etc\hosts -Value "`n${your_machine_ip} controller.starwhale.svc minio.starwhale.svc minio-admin.starwhale.svc"
    - + \ No newline at end of file diff --git a/zh/0.5.12/server/installation/starwhale_env/index.html b/zh/0.5.12/server/installation/starwhale_env/index.html index 5aedc3521..e6c6cfeae 100644 --- a/zh/0.5.12/server/installation/starwhale_env/index.html +++ b/zh/0.5.12/server/installation/starwhale_env/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    Starwhale 环境变量文件示例

    ################################################################################
    # *** Required ***
    # The external Starwhale server URL. For example: https://cloud.starwhale.ai
    SW_INSTANCE_URI=

    # The listening port of Starwhale Server
    SW_CONTROLLER_PORT=8082

    # The maximum upload file size. This setting affects datasets and models uploading when copied from outside.
    SW_UPLOAD_MAX_FILE_SIZE=20480MB
    ################################################################################
    # The base URL of the Python Package Index to use when creating a runtime environment.
    SW_PYPI_INDEX_URL=http://10.131.0.1/repository/pypi-hosted/simple/

    # Extra URLs of package indexes to use in addition to the base url.
    SW_PYPI_EXTRA_INDEX_URL=

    # Space separated hostnames. When any host specified in the base URL or extra URLs does not have a valid SSL
    # certification, use this option to trust it anyway.
    SW_PYPI_TRUSTED_HOST=
    ################################################################################
    # The JWT token expiration time. When the token expires, the server will request the user to login again.
    SW_JWT_TOKEN_EXPIRE_MINUTES=43200

    # *** Required ***
    # The JWT secret key. All strings are valid, but we strongly recommend you to use a random string with at least 16 characters.
    SW_JWT_SECRET=
    ################################################################################
    # The scheduler controller to use. Valid values are:
    # docker: Controller schedule jobs by leveraging docker
    # k8s: Controller schedule jobs by leveraging Kubernetes
    SW_SCHEDULER=k8s

    # The Kubernetes namespace to use when running a task when SW_SCHEDULER is k8s
    SW_K8S_NAME_SPACE=default

    # The path on the Kubernetes host node's filesystem to cache Python packages. Use the setting only if you have
    # the permission to use host node's filesystem. The runtime environment setup process may be accelerated when the host
    # path cache is used. Leave it blank if you do not want to use it.
    SW_K8S_HOST_PATH_FOR_CACHE=

    # The ip for the containers created by Controller when SW_SCHEDULER is docker
    SW_DOCKER_CONTAINER_NODE_IP=127.0.0.1
    ###############################################################################
    # *** Required ***
    # The object storage system type. Valid values are:
    # s3: [AWS S3](https://aws.amazon.com/s3) or other s3-compatible object storage systems
    # aliyun: [Aliyun OSS](https://www.alibabacloud.com/product/object-storage-service)
    # minio: [MinIO](https://min.io)
    # file: Local filesystem
    SW_STORAGE_TYPE=

    # The path prefix for all data saved on the storage system.
    SW_STORAGE_PREFIX=
    ################################################################################
    # The following settings are only used when SW_STORAGE_TYPE is file.

    # The root directory to save data.
    # This setting is only used when SW_STORAGE_TYPE is file.
    SW_STORAGE_FS_ROOT_DIR=/usr/local/starwhale
    ################################################################################
    # The following settings are only used when SW_STORAGE_TYPE is not file.

    # *** Required ***
    # The name of the bucket to save data.
    SW_STORAGE_BUCKET=

    # *** Required ***
    # The endpoint URL of the object storage service.
    # This setting is only used when SW_STORAGE_TYPE is s3 or aliyun.
    SW_STORAGE_ENDPOINT=

    # *** Required ***
    # The access key used to access the object storage system.
    SW_STORAGE_ACCESSKEY=

    # *** Required ***
    # The secret access key used to access the object storage system.
    SW_STORAGE_SECRETKEY=

    # *** Optional ***
    # The region of the object storage system.
    SW_STORAGE_REGION=

    # Starwhale Server will use multipart upload when uploading a large file. This setting specifies the part size.
    SW_STORAGE_PART_SIZE=5MB
    ################################################################################
    # MySQL settings

    # *** Required ***
    # The hostname/IP of the MySQL server.
    SW_METADATA_STORAGE_IP=

    # The port of the MySQL server.
    SW_METADATA_STORAGE_PORT=3306

    # *** Required ***
    # The database used by Starwhale Server
    SW_METADATA_STORAGE_DB=starwhale

    # *** Required ***
    # The username of the MySQL server.
    SW_METADATA_STORAGE_USER=

    # *** Required ***
    # The password of the MySQL server.
    SW_METADATA_STORAGE_PASSWORD=
    ################################################################################
    - + \ No newline at end of file diff --git a/zh/0.5.12/server/project/index.html b/zh/0.5.12/server/project/index.html index 49d9e479d..141b38bf3 100644 --- a/zh/0.5.12/server/project/index.html +++ b/zh/0.5.12/server/project/index.html @@ -10,7 +10,7 @@ - + @@ -18,7 +18,7 @@
    版本:0.5.12

    Project Management

    Project type

    There are two types of projects:

    • Public: Visible to anyone. Everyone on the internet can find and see public projects.

    • Private: Visible to users specified in the project member settings. Private projects can only be seen by project owners and project members. The project owner can manage access in the project setting of Manage Member.

    Create a project

    1 Sign in to Starwhale, click Create Project.

    creat

    2 Type a name for the project.

    image

    提示

    Avoid duplicate project names.For more information, see Names in Starwhale

    3 Select project visibility to decide who can find and see the project.

    image

    4 Type a description. It is optional.

    image

    5 To finish, click Submit.

    image

    Edit a project

    The name, privacy and description of a project can be edited.

    提示

    Users with the project owner or maintainer role can edit a project. For more information, see Roles and permissions

    Edit name

    • If you are on the project list page:

      1 Hover your mouse over the project you want to edit, then click the Edit button.

      image

      2 Enter a new name for the project.

      image

      提示

      Avoid duplicate project names. For more information, see Names in Starwhale

      3 Click Submit to save changes.

      image

      4 If you're editing multiple projects, repeat steps 1 through 3.

    • If you are on a specific project:

      1 Select Overview on the left navigation, and click Edit.

      image

      2 Enter a new name for the project.

      image

      提示

      Avoid duplicate project names. For more information, see Names in Starwhale

      3 Click Submit to save changes.

      image

    Edit privacy

    • If you are on the project list page:

      1 Hover your mouse over the project you want to edit, then click the Edit button.

      image

      2 Click the Public or Private by your command. For more information, see Project types.

      image

      3 Click Submit to save changes.

      image

    • If you are on a specific project

      1 Select Overview on the left navigation, and click Edit.

      image

      2 Click the Public or Private by your command. For more information, see Project types.

      image

      3 Click Submit to save changes.

      image

    Edit description

    • If you are on the project list page:

      1 Hover your mouse over the project you want to edit, then click the Edit button.

      image

      2 Enter any description you want to describe the project.

      image

      3 Click Submit to save changes.

      image

    • If you are on a specific project

      1 Select Overview on the left navigation, and click Edit.

      image

      2 Enter any description you want to describe the project.

      image

      3 Click Submit to save changes.

      image

    Delete a project

    1 Hover your mouse over the project you want to delete, then click the Delete button.

    image

    2 If you are sure to delete, type the exact name of the project and then click Confirm to delete the project.

    image

    :::Important: When you delete a project, all the models, datasets, evaluations and runtimes belonging to the project will also be deleted and can not be restored. Be careful about the action. :::

    Manage project member

    Only users with the admin role can assign people to the project. The project owner defaulted to having the project owner role.

    Add a member to the project

    1 On the project list page or overview tab, click the Manage Member button, then Add Member.

    image

    image

    2 Type the username you want to add to the project, then click a name in the list of matches.

    image

    3 Select a project role for the member from the drop-down menu.For more information, see Roles and permissions

    image

    4 To finish, click Submit.

    image

    Remove a member

    1 On the project list page or project overview tab, click the Manage Member button.

    image

    2 Find the username you want to remove in the search box, click Remove, then Yes.

    image

    - + \ No newline at end of file diff --git a/zh/0.5.12/swcli/config/index.html b/zh/0.5.12/swcli/config/index.html index 49811da4c..a394c735d 100644 --- a/zh/0.5.12/swcli/config/index.html +++ b/zh/0.5.12/swcli/config/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    配置文件

    Standalone Instance 是安装在用户的笔记本或开发服务器上,以Linux/Mac用户为粒度进行隔离。用户通过 pip 命令安装 Starwhale Python package 并执行任意 swcli 命令后,就可以在 ~/.config/starwhale/config.yaml 中查看该用户的 Starwhale 配置。绝大多数情况加用户不需要手工修改config.yaml文件

    ~/.config/starwhale/config.yaml 文件权限为 0o600,由于里面存有密钥信息,不建议用户修改该文件权限。您可以通过swci config edit来修改配置:

    swcli config edit

    config.yaml 例子

    典型的 config.yaml 文件内容如下:

    • 当前默认默认 Instance 为 local。
    • cloud-cn/cloud-k8s/pre-k8s 三个为 Cloud Instance,local 为 Standalone Instance。
    • Standalone 本地存储的根目录为 /home/liutianwei/.starwhale
    current_instance: local
    instances:
    cloud-cn:
    sw_token: ${TOKEN}
    type: cloud
    updated_at: 2022-09-28 18:41:05 CST
    uri: https://cloud.starwhale.cn
    user_name: starwhale
    user_role: normal
    cloud-k8s:
    sw_token: ${TOKEN}
    type: cloud
    updated_at: 2022-09-19 16:10:01 CST
    uri: http://cloud.pre.intra.starwhale.ai
    user_name: starwhale
    user_role: normal
    local:
    current_project: self
    type: standalone
    updated_at: 2022-06-09 16:14:02 CST
    uri: local
    user_name: liutianwei
    pre-k8s:
    sw_token: ${TOKEN}
    type: cloud
    updated_at: 2022-09-19 18:06:50 CST
    uri: http://console.pre.intra.starwhale.ai
    user_name: starwhale
    user_role: normal
    link_auths:
    - ak: starwhale
    bucket: users
    connect_timeout: 10.0
    endpoint: http://10.131.0.1:9000
    read_timeout: 100.0
    sk: starwhale
    type: s3
    storage:
    root: /home/liutianwei/.starwhale
    version: '2.0'

    config.yaml 字段说明

    参数说明类型默认值是否必须
    current_instance默认使用的instance名字,一般用 swcli instance select 命令设置Stringself
    instances管理的 Instances,包括 Standalone, Server 和 Cloud Instance,至少会有 Standalone Instance(名称为local),Server/Cloud Instance有一个或多个,一般用 swcli instance login 登陆一个新的instance,swcli instance logout 退出一个instanceDictStandalone Instance,名称为local
    instances.{instance-alias-name}.sw_token登陆Token,只对Server/Cloud Instance生效,后续swcli对Server/Cloud Instance进行操作时都会使用该Token。需要注意Token有过期时间,默认1个月,可以在Server/Cloud Instance中进行设置StringCloud-是,Standalone-否
    instances.{instance-alias-name}.typeinstance类型,目前只能填写 cloudstandaloneChoice[String]
    instances.{instance-alias-name}.uri对于Server/Cloud Instance,uri是http/https地址,对于Standalone Instance,uri是 localString
    instances.{instance-alias-name}.user_name用户名String
    instances.{instance-alias-name}.current_project当前Instance下默认的Project是什么,在URI的表述中会作为project字段进行默认填充,可以通过 swcli project select 命令进行设置String
    instances.{instance-alias-name}.user_role用户角色Stringnormal
    instances.{instance-alias-name}.updated_at该条Instance配置更新时间时间格式字符串
    storage与本地存储相关的设置Dict
    storage.rootStandalone Instance本地存储的根目录。通常情况下,当home目录空间不足,手工把数据文件移动到其他位置时,可以修改该字段String~/.starwhale
    versionconfig.yaml的版本,目前仅支持2.0String2.0

    Standalone Instance 的文件存储结构

    ${storage.root} 目录中存储了 Standalone Instance 所有的用户数据,包括 Project、Runtime、Model、Dataset、Evaluation 等用户直接感知的数据,也包括 ObjectStore、DataStore 等 Starwhale 后台实现的存储。具体说明如下:

    +-- ${storage.root}
    | +-- .objectstore --> 存储数据集chunk文件的简单存储,使用blake2b hash算法
    | | +-- blake2b --> hash算法名称
    | | | +-- 00 --> hash2位前缀
    | | | | +-- 0019ad58... --> object文件,文件名是文件内容的hash值
    | | | +-- 05
    | +-- .datastore --> 基于pyarrow的列式存储
    | | +-- project
    | | | +-- self --> 按照project名称进行分类存储
    | | | | +-- dataset --> 数据集相关的datastore存储,一般用来存储数据集的索引信息
    | | | | +-- eval --> 模型评测结果存储
    | +-- .recover --> 软删除某个project的存储目录,可以用 `swcli project recover` 进行恢复
    | +-- .tmp --> Dataset/Model/Runtime 构建过程中临时目录
    | +-- myproject --> 用户创建的project,所有myproject信息都存储在该目录
    | +-- self --> Standalone Instance自动创建的project
    | | +-- dataset --> swds数据集存储目录
    | | +-- evaluation --> 模型评测配置文件、日志等存储目录
    | | +-- model --> swmp模型包存储目录
    | | +-- runtime --> swrt环境包存储目录
    | | +-- workdir --> 解压、复原包文件的目录
    | | | +-- model --> swmp解压后的目录
    | | | +-- runtime --> swrt解压后的目录,若进行runtime restore操作,生成的venv或conda隔离环境,也会存放在该目录中

    有时候您可能需要用到 starwhale.Link 来存储一些信息。理论上,Link里面的URI可以是任意的合法 URI(星鲸目前只支持S3协议族和HTTP),比如s3://10.131.0.1:9000/users/path。然而,有些 Link是需要鉴权才能访问的。 link_auths 就是用来存放这些鉴权信息的。

    link_auths:
    - type: s3
    ak: starwhale
    bucket: users
    region: local
    connect_timeout: 10.0
    endpoint: http://10.131.0.1:9000
    read_timeout: 100.0
    sk: starwhale

    link_auths 里面的每一条都会自动匹配您的URI。 目前 S3 类型的鉴权信息通过 bucketendpoint 来匹配 URI。

    - + \ No newline at end of file diff --git a/zh/0.5.12/swcli/index.html b/zh/0.5.12/swcli/index.html index 954d418ef..6b91d6b08 100644 --- a/zh/0.5.12/swcli/index.html +++ b/zh/0.5.12/swcli/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    Starwhale Client (swcli) 用户指南

    swcli 是一个命令行工具,可让您与 Starwhale 实例进行交互。您可以使用 swcli 完成 Starwhale 中几乎所有的任务。swcli 是用纯 Python3 编写的(需要 Python 3.7 ~ 3.11),因此可以通过 pip 命令轻松安装。目前,swcli 仅支持 Linux 和 macOS,Windows版本即将推出。

    - + \ No newline at end of file diff --git a/zh/0.5.12/swcli/installation/index.html b/zh/0.5.12/swcli/installation/index.html index aa41daee5..d8391b9cd 100644 --- a/zh/0.5.12/swcli/installation/index.html +++ b/zh/0.5.12/swcli/installation/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    安装指南

    swcli 命令行工具能够对各种实例完成几乎所有的操作,由于是由纯 Python3 编写,可以使用 pip 命令完成安装,本文会提供一些安装建议,帮助您获得一个干净的、无依赖冲突的 swcli Python 环境。

    安装建议

    非常不建议将 Starwhale 安装在系统的全局 Python 环境中,可能会导致 Python 的依赖冲突问题。使用 venv 或 conda 创建一个隔离的 Python 环境,并在其中安装 Starwhale,是 Python 推荐的做法。

    先决条件

    • Python3.7 ~ 3.11
    • Linux 或 macOS
    • Conda(可选)

    在Ubuntu系统中,可以运行以下命令:

    sudo apt-get install python3 python3-venv python3-pip

    #如果您想安装多个python版本
    sudo add-apt-repository -y ppa:deadsnakes/ppa
    sudo apt-get update
    sudo apt-get install -y python3.7 python3.8 python3.9 python3-pip python3-venv python3.8-venv python3.7-venv python3.9-venv

    swcli 可以在 macOS 下工作,包括 arm(M1 Chip) 和 x86(Intel Chip) 两种体系结构。但 macOS 下自带的 Python3 可能会遇到一些 Python 自身的问题,推荐使用 homebrew 进行安装:

    brew install python3

    安装 swcli

    使用venv安装

    venv 环境即可以使用 Python3 自带的 venv,也可以使用 virtualenv 工具。

    python3 -m venv ~/.cache/venv/starwhale
    source ~/.cache/venv/starwhale/bin/activate
    python3 -m pip install starwhale

    swcli --version

    sudo rm -rf /usr/local/bin/swcli
    sudo ln -s `which swcli` /usr/local/bin/

    使用conda安装

    conda create --name starwhale --yes  python=3.9
    conda activate starwhale
    python3 -m pip install starwhale

    swcli --version

    sudo rm -rf /usr/local/bin/swcli
    sudo ln -s `which swcli` /usr/local/bin/

    👏 现在,您可以在全局环境中使用 swcli 了。

    swcli 的特定场景依赖安装

    # 针对Audio处理, 主要包含soundfile库等
    python -m pip install starwhale[audio]

    # 针对Image处理,主要包含pillow库等
    python -m pip install starwhale[pillow]

    # 针对swcli model server命令
    python -m pip install starwhale[server]

    # 针对内建的Online Serving
    python -m pip install starwhale[online-serve]

    # 安装全部依赖
    python -m pip install starwhale[all]

    更新 swcli

    #适用于venv环境
    python3 -m pip install --upgrade starwhale

    #适用于conda环境
    conda run -n starwhale python3 -m pip install --upgrade starwhale

    卸载swcli

    python3 -m pip remove starwhale

    rm -rf ~/.config/starwhale
    rm -rf ~/.starwhale
    - + \ No newline at end of file diff --git a/zh/0.5.12/swcli/swignore/index.html b/zh/0.5.12/swcli/swignore/index.html index a4495a2ca..dacbad0a3 100644 --- a/zh/0.5.12/swcli/swignore/index.html +++ b/zh/0.5.12/swcli/swignore/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    关于 .swignore 文件

    .swignore 文件与 .gitignore, .dockerignore 等文件类似,都是用来定义忽略某些文件或文件夹。.swignore 文件主要应用在 Starwhale 的模型构建过程中。默认情况下,swcli model build 命令 或 starwhale.model.build() Python SDK会遍历指定目录下的所有文件,并自动排除一些已知的、不适合放入模型包中的文件或目录。

    文件格式

    • swignore文件中的每一行指定一个匹配文件和目录的模式。
    • 空行不匹配任何文件,因此它可以作为可读性的分隔符。
    • 星号*匹配除斜杠以外的任何内容。
    • #开头的行作为注释。
    • 支持wildcard的表达,类似 *.jpg, *.png

    默认下自动排除的文件或目录

    如果不想排除这些文件,可以构建模型 (swcli model build 命令) 的时候增加 --add-all 参数。

    • __pycache__/
    • *.py[cod]
    • *$py.class
    • venv安装目录
    • conda安装目录

    例子

    这是MNIST示例中使用的.swignore文件:

    venv/*
    .git/*
    .history*
    .vscode/*
    .venv/*
    data/*
    .idea/*
    *.py[cod]
    - + \ No newline at end of file diff --git a/zh/0.5.12/swcli/uri/index.html b/zh/0.5.12/swcli/uri/index.html index 741e0c415..35ba4494f 100644 --- a/zh/0.5.12/swcli/uri/index.html +++ b/zh/0.5.12/swcli/uri/index.html @@ -10,13 +10,13 @@ - +
    版本:0.5.12

    Starwhale 资源URI

    提示

    资源 URI 在 Starwhale Client 中被广泛使用。URI 可以引用本地实例中的资源或远程实例中的任何其他资源。 这样 Starwhale Client 就可以轻松操作任何资源。

    concepts-org.jpg

    实例URI

    实例 URI 可以是以下形式之一:

    • local: 指本地的 Standalone 实例.
    • [http(s)://]<hostname or ip>[:<port>]:指向一个 Starwhale Cloud 实例。
    • [cloud://]<cloud alias>:Server或Cloud的实例别名,可以在实例登录阶段配置。
    警告

    “local”不同于“localhost”,前者为 Standalone 实例,而后者是一个 URL ,指向本地运行的 Starwhale Server 实例。

    例子:

    # 登录Starwhale Cloud,别名为swcloud
    swcli instance login --username <your account name> --password <your password> https://cloud.starwhale.cn --alias swcloud

    # 将模型从本地实例复制到云实例
    swcli model copy mnist/version/latest swcloud/project/<your account name>:demo

    # 将运行时复制到Starwhale Server实例:http://localhost:8081
    swcli runtime copy pytorch/version/v1 http://localhost:8081/project/<your account name>:demo

    项目URI

    项目URI的格式为“[<实例URI>/project/]<project name>”。 如果未指定实例 URI,则使用当前实例。

    例子:

    swcli project select self   # 选择当前实例中的self项目
    swcli project info local/project/self # 查看本地实例中的self项目信息

    模型/数据集/运行时URI

    • 模型URI: [<项目URI>/model/]<model name>[/version/<version id|tag>].
    • 数据集URI: [<项目URI>/dataset/]<dataset name>[/version/<version id|tag>].
    • 运行时URI: [<项目URI>/runtime/]<runtime name>[/version/<version id|tag>].
    提示
    • swcli 支持更加人性化的短版本ID。您可以只键入版本ID的前几个字符,前提是它至少有四个字符长且唯一指向某个版本ID。但是,recover 命令必须使用完整的版本ID。
    • 如果未指定项目URI,将使用默认项目
    • 您始终可以使用版本标签而不是版本ID。

    例子:

    swcli model info mnist/version/hbtdenjxgm4ggnrtmftdgyjzm43tioi  # 检查模型信息,模型名称:mnist,版本:hbtdenjxgm4ggnrtmftdgyjzm43tioi
    swcli model remove mnist/version/hbtdenj # 使用短版本ID
    swcli model info mnist # 检查mnist模型信息
    swcli model run mnist --runtime pytorch-mnist --dataset mnist # 使用latest的默认tag

    作业URI

    • 格式: [<项目URI>/job/]<job id>.
    • 如果未指定项目URI,将使用默认项目。

    例子:

    swcli job info mezdayjzge3w   # 查看默认实例和默认项目中的mezdayjzge3w版本
    swcli job info local/project/self/job/mezday # 检查本地实例,self项目,作业id:mezday

    默认实例

    当项目URI中的实例部分被省略时,将使用默认实例进行替代。默认实例是由 swcli instance loginswcli instance use 指定的。

    默认项目

    当模型/数据集/运行时/评估URI的项目部分被省略时,将使用默认项目。默认项目是指通过 swcli project use 命令选择的项目。

    - + \ No newline at end of file diff --git a/zh/0.6.0/cloud/billing/bills/index.html b/zh/0.6.0/cloud/billing/bills/index.html index 1cdd4668c..220de8f58 100644 --- a/zh/0.6.0/cloud/billing/bills/index.html +++ b/zh/0.6.0/cloud/billing/bills/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    账单明细

    账单明细查看

    登录账户中心,点击“账户管理”,可在账户概览页面查看最近账单明细,点击"全部账单”,可跳转查看全部账单明细。

    image

    image

    账单明细字段说明

    • 账单编号:账单的唯一标识
    • 资源:用户所使用的各类资源
    • 资源明细:使用资源运行的作业
    • 消费时间:账单开始时间至账单结束时间
    • 计费项:用户所用的产品或服务所含的具体的计费项目
    • 单价:产品或服务的单价
    • 单价单位:产品或服务单价的单位
    • 用量:产品或服务的使用量
    • 用量单位:产品或服务使用量的单位
    • 状态:账单的支付状态,分为:未结清、已结清、未结算
    - + \ No newline at end of file diff --git a/zh/0.6.0/cloud/billing/index.html b/zh/0.6.0/cloud/billing/index.html index 19b39bd2b..cf36bdd31 100644 --- a/zh/0.6.0/cloud/billing/index.html +++ b/zh/0.6.0/cloud/billing/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    产品计费概述

    Starwhale 支持按量付费,按量付费是一种先使用后付费的计费方式。通过按量付费,您可以按资源使用量付费,不需要提前购买大量资源。

    计费说明

    计费项

    Starwhale 根据您选购的资源规格(CPU、GPU、内存)及使用时长进行计费。

    计费方式

    Starwhale 支持按量付费,按量付费是一种先使用后付费的计费方式。通过按量付费,您可以按资源使用量付费,不需要提前购买大量资源。

    按量付费主要按照资源计费周期计费,在每个结算周期生成账单并从账户中扣除相应费用。创建 Job 时,需要确定计算资源配置。

    请确保您在 Job 运行期间可用余额充足,如果在 Job 运行过程中,您的账户余额不足,会导致 Job 无法完成并按照已运行时长收费。

    开通要求

    按照按量付费创建 Job 前,您的 Starwhale 账户可用余额不得小于一个计费周期。

    说明:账户可用余额=充值金额+代金券金额-已消费金额-已退款金额-已冻结金额

    计费周期

    每5分钟为一个计费周期,不足5分钟则按5分钟计算,按照 Job 运行时长结算。

    计费时长

    从 Job 开始运行时计费,到 Job 运行结束后结束计费

    账单明细

    登录账户中心,点击“账户管理”,可在账户概览页面查看最近账单明细,点击"全部账单”,可跳转查看全部账单明细。详细操作流程请参见账单明细

    欠费说明

    如果账号内存在欠费账单,您无法继续使用计算资源。尽快充值结清欠费账单后可继续使用。

    查看欠费金额

    1 登录账户中心

    2 在账户概览可查看欠费金额

    退款说明

    现金余额支持退款

    需要登录账户中心,点击账户管理>充值订单,可退款的充值订单会在操作列显示退款按钮。点击可发起退款申请,详细操作流程请参见申请退款

    - + \ No newline at end of file diff --git a/zh/0.6.0/cloud/billing/recharge/index.html b/zh/0.6.0/cloud/billing/recharge/index.html index e7dfd19e7..c94109fce 100644 --- a/zh/0.6.0/cloud/billing/recharge/index.html +++ b/zh/0.6.0/cloud/billing/recharge/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    充值和退款

    充值渠道

    Starwhale目前支持通过微信渠道进行充值。

    充值操作步骤

    操作路径:

    1 登录账户中心,点击“去充值”,可跳转至充值页面。

    image

    2 选择或者输入充值金额,充值金额需要大于50元,同时注意支付渠道的限额(超过限额会无法支付成功)。

    image

    3 选择充值方式,点击“确认充值”,跳转第三方支付渠道完成付款。

    image

    充值订单

    查看充值订单

    操作路径:

    1 登录账户中心,点击“账户管理”,可在账户概览页面查看最近充值订单,点击"全部订单,可跳转查看全部充值订单。

    image

    image

    继续支付充值订单

    如您在充值页面,点击“开始充值”后,因某些原因没有支付,可在30分钟内继续支付。

    操作路径:

    1 登录账户中心,点击“账户管理”,点击账户管理>充值订单菜单栏进入充值订单列表。

    image

    2 找到需要继续支付的订单,点击“继续支付

    image

    3 选择充值方式,点击“确认充值”,跳转第三方支付渠道完成付款。

    image

    取消充值订单

    操作路径:

    1 登录账户中心,点击“账户管理”,点击账户管理>充值订单菜单栏进入充值订单列表。

    image

    2 找到需要取消的订单,点击“取消”,弹出确认弹窗后,点击“”,可取消充值订单。

    image

    - + \ No newline at end of file diff --git a/zh/0.6.0/cloud/billing/refund/index.html b/zh/0.6.0/cloud/billing/refund/index.html index 01491552f..e25214b62 100644 --- a/zh/0.6.0/cloud/billing/refund/index.html +++ b/zh/0.6.0/cloud/billing/refund/index.html @@ -10,7 +10,7 @@ - + @@ -18,7 +18,7 @@
    版本:0.6.0

    账户退款

    申请退款

    1 登录账户中心,点击“账户管理”,点击账户管理>充值订单菜单栏进入充值订单列表。

    image

    2 找到需要退款的订单,点击“退款”,填写退款原因,确认退款金额,可申请退款。

    ::: tips: 退订款项将原路退回,即通过微信支付的订单会退回到支付使用的微信 :::

    image

    image

    image

    image

    - + \ No newline at end of file diff --git a/zh/0.6.0/cloud/billing/voucher/index.html b/zh/0.6.0/cloud/billing/voucher/index.html index c3f5703e1..d5921d960 100644 --- a/zh/0.6.0/cloud/billing/voucher/index.html +++ b/zh/0.6.0/cloud/billing/voucher/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    代金券

    什么是代金券

    代金券是starwhale以虚拟券形式给予客户的资金类权益,可用于抵扣运行时所使用资源的费用。

    如何查看我的代金券?

    登录Starwhale,进入“账户中心>代金券” 可查看代金券的编号,面值,余额,状态等信息。

    image

    点击右侧操作列“使用明细”打开“代金券使用明细页”,查看该代金券的交易时间、编号、支出等抵扣详细记录。

    image

    如何使用代金券?

    代金券适用于抵扣消费,如果您的Starwhale账户内有代金券,系统会优先抵扣代金券金额,代金券余额为0后会抵扣充值余额。

    - + \ No newline at end of file diff --git a/zh/0.6.0/cloud/index.html b/zh/0.6.0/cloud/index.html index 70069d606..9e76139a3 100644 --- a/zh/0.6.0/cloud/index.html +++ b/zh/0.6.0/cloud/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    Starwhale Cloud 用户指南

    Starwhale Cloud 是托管在公有云上的服务,由 Starwhale 团队负责运维,访问地址是 https://cloud.starwhale.cn

    - + \ No newline at end of file diff --git a/zh/0.6.0/community/contribute/index.html b/zh/0.6.0/community/contribute/index.html index 3452d0c11..344323eaa 100644 --- a/zh/0.6.0/community/contribute/index.html +++ b/zh/0.6.0/community/contribute/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    Starwhale 开源贡献指南

    参与贡献

    Starwhale 非常欢迎来自开源社区的贡献,包括但不限于以下方式:

    • 描述使用过程中的遇到的问题
    • 提交Feature Request
    • 参与Slack和Github Issues讨论
    • 参与Code Review
    • 改进文档和示例程序
    • 修复程序Bug
    • 增加Test Case
    • 改进代码的可读性
    • 开发新的Features
    • 编写Enhancement Proposal

    可以通过以下方式参与开发者社区,获取最新信息和联系Starwhale开发者:

    Starwhale社区使用Github Issues来跟踪问题和管理新特性的开发。可以选择"good first issue"或"help wanted"标签的issue,作为参与开发Starwhale的起点。

    Starwhale资源列表

    代码基本结构

    核心目录组织及功能说明如下:

    • client:swcli和Python SDK的实现,使用Python3编写,对应Starwhale Standalone Instance的所有功能。
      • api:Python SDK的接口定义和实现。
      • cli:Command Line Interface的入口点。
      • base:Python 端的一些基础抽象。
      • core:Starwhale 核心概念的实现,包括Dataset、Model、Runtime、Project、Job、Evaluation等。
      • utils:Python 端的一些工具函数。
    • console:前端的实现,使用React + TypeScript编写,对应Starwhale Cloud Instance的Web UI。
    • server:Starwhale Controller的实现,使用Java编写,对应Starwhale Cloud Instance的后端API。
    • docker:Helm Charts,绝大多数Docker Image的Dockerfile等。
    • docs:Starwhale官方文档。
    • example:示例程序,包含MNIST等例子。
    • scripts:一些Bash和Python脚本,用来进行E2E测试和软件发布等。

    Fork&Clone Starwhale仓库

    您需要fork Starwhale仓库代码并clone到本机,

    搭建针对Standalone Instance的本地开发环境

    Standalone Instance采用Python编写,当要修改Python SDK和swcli时,需要进行相应的环境搭建。

    Standalone本地开发环境前置条件

    • OS:Linux或macOS
    • Python:3.7~3.11
    • Docker:>=19.03 (非必须,当调试dockerize、生成docker image或采用docker为载体运行模型任务时需要)
    • Python隔离环境:Python venv 或 virtualenv 或 conda等都可以,用来构建一个隔离的Python环境

    从源码进行安装

    基于上一步clone到本地的仓库目录:starwhale,并进入到client子目录:

    cd starwhale/client

    使用Conda创建一个Starwhale开发环境,或者使用venv/virtualenv等创建:

    conda create -n starwhale-dev python=3.8 -y
    conda activate starwhale-dev

    安装Client包及依赖到starwhale-dev环境中:

    make install-sw
    make install-dev-req

    输入swcli --version命令,观察是否安装成功,开发环境的swcli版本是 0.0.0.dev0

    ❯ swcli --version
    swcli, version 0.0.0.dev0

    ❯ swcli --version
    /home/username/anaconda3/envs/starwhale-dev/bin/swcli

    本地修改代码

    现在可以对Starwhale代码进行修改,不需要重复安装(make install-sw命令)就能在当前starwhale-dev环境是测试cli或sdk。Starwhale Repo中设置了 .editorconfig 文件,大部分IDE或代码编辑器会自动支持该文件的导入,采用统一的缩进设置。

    执行代码检查和测试

    starwhale 目录中操作,会执行单元测试、client的e2e测试、mypy检查、flake8检查和isort检查等。

    make client-all-check

    搭建针对Cloud Instance的本地开发环境

    Cloud Instance的后端采用Java编写,前端采用React+TypeScript编写,可以按需搭建相应的开发环境。

    搭建前端Console开发环境

    搭建后端Server开发环境

    • 开发语言:Java
    • 项目构建工具:Maven
    • 开发框架:Spring Boot+Mybatis
    • 测试框架:Junit5(其中mock框架为mockito,断言部分使用hamcrest,数据库、web服务等模拟使用Testcontainers)
    • 代码检查:使用maven插件 maven-checkstyle-plugin

    Server开发环境前置条件

    • OS:Linux、macOS或Windows
    • JDK: >=11
    • Docker:>=19.03
    • Maven:>=3.8.1
    • Mysql:>=8.0.29
    • Minio
    • Kubernetes cluster/Minikube(如果没有k8s集群,可以使用Minikube作为开发调试时的备选方案)

    修改代码并增加单测

    现在可以进入到相应模块,对server端的代码进行修改、调整。其中业务功能代码位置为src/main/java,单元测试目录为src/test/java。

    执行代码检查和单元测试

    cd starwhale/server
    mvn clean package

    本地部署服务

    • 前置服务

      • Minikube(可选,无k8s集群时可使用此服务,安装方式可见:Minikube

        minikube start
        minikube addons enable ingress
        minikube addons enable ingress-dns
      • Mysql

        docker run --name sw-mysql -d \
        -p 3306:3306 \
        -e MYSQL_ROOT_PASSWORD=starwhale \
        -e MYSQL_USER=starwhale \
        -e MYSQL_PASSWORD=starwhale \
        -e MYSQL_DATABASE=starwhale \
        mysql:latest
      • Minio

        docker run --name minio -d
        -p 9000:9000 --publish 9001:9001
        -e MINIO_DEFAULT_BUCKETS='starwhale'
        -e MINIO_ROOT_USER="minioadmin"
        -e MINIO_ROOT_PASSWORD="minioadmin"
        bitnami/minio:latest
    • 打包server程序

      若部署server端时,需要把前端同时部署上,可先执行前端部分的构建命令,然后执行'mvn clean package',则会自动将已编译好的前端文件打包进来。

      使用如下命令对程序进行打包:

      cd starwhale/server
      mvn clean package
    • 指定server启动所需的环境变量

      # Minio相关配置
      export SW_STORAGE_ENDPOINT=http://${Minio IP,默认为127.0.0.1}:9000
      export SW_STORAGE_BUCKET=${Minio bucket,默认为starwhale}
      export SW_STORAGE_ACCESSKEY=${Minio accessKey,默认为starwhale}
      export SW_STORAGE_SECRETKEY=${Minio secretKey,默认为starwhale}
      export SW_STORAGE_REGION=${Minio region,默认为local}
      # kubernetes配置
      export KUBECONFIG=${.kube配置文件所在路径}\.kube\config

      export SW_INSTANCE_URI=http://${Server服务所在机器IP}:8082
      # Mysql相关配置
      export SW_METADATA_STORAGE_IP=${Mysql IP,默认为127.0.0.1}
      export SW_METADATA_STORAGE_PORT=${Mysql port,默认为3306}
      export SW_METADATA_STORAGE_DB=${Mysql dbname,默认为starwhale}
      export SW_METADATA_STORAGE_USER=${Mysql user,默认为starwhale}
      export SW_METADATA_STORAGE_PASSWORD=${user password,默认为starwhale}
    • 部署server服务

      使用IDE或如下方式部署均可。

      java -jar controller/target/starwhale-controller-0.1.0-SNAPSHOT.jar
    • 功能调试

      这里有两种方式对修改的功能进行调试:

      • 使用swagger-ui进行接口调试,访问 /swagger-ui/index.html找到对应的api即可。
      • 或直接在ui访问,进行相应功能的调试(前提是打包时已经按说明将前端代码进行了提前构建)
    - + \ No newline at end of file diff --git a/zh/0.6.0/concepts/index.html b/zh/0.6.0/concepts/index.html index 5cf30413f..75d169c5f 100644 --- a/zh/0.6.0/concepts/index.html +++ b/zh/0.6.0/concepts/index.html @@ -10,13 +10,13 @@ - + - + \ No newline at end of file diff --git a/zh/0.6.0/concepts/names/index.html b/zh/0.6.0/concepts/names/index.html index 4de09f15a..59b77574e 100644 --- a/zh/0.6.0/concepts/names/index.html +++ b/zh/0.6.0/concepts/names/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    Starwhale中的命名规则

    下文的命名是指对Starwhale中的项目、模型、数据集、运行时以及版本标签进行命名。

    名称限制

    • 名称不区分大小写。
    • 名称必须仅由大小写字母“A-Z a-z”、数字“0-9”、连字符“-”、点“.”和下划线“_”组成。
    • 名称应始终以字母或“_”字符开头。
    • 名称的最大长度为80。

    名称唯一性要求

    • 资源名称在其所影响范围内必须是唯一的。例如,项目名称在实例中必须是唯一的,模型名称在其所在项目中必须是唯一的。
    • 同一个项目下同类资源必须使用不同的名称,包括那些已删除的资源。 例如,项目“Apple”不能有两个名为“Alice”的模型,即使其中一个已经被删除。
    • 不同种类的资源可以有相同的名称。 例如,一个项目、一个模型和一个数据集可以同时被命名为“Alice”。
    • 不同项目的资源可以具有相同的名称。 例如,“Apple”项目中的模型和“Banana”项目中的模型可以具有相同的名称“Alice”。
    • 已经被垃圾回收的资源名称可以重复使用。 例如,将项目“Apple”中名称为“Alice”的模型移除并进行垃圾回收后,该项目可以有一个新的同名模型“Alice”。
    - + \ No newline at end of file diff --git a/zh/0.6.0/concepts/project/index.html b/zh/0.6.0/concepts/project/index.html index ac90e872d..d38bbd2a2 100644 --- a/zh/0.6.0/concepts/project/index.html +++ b/zh/0.6.0/concepts/project/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    Starwhale中的项目

    “项目”是组织不同资源(如模型、数据集等)的基本单位。您可以将项目用于不同的目的。例如,您可以为数据科学家团队、产品线或特定模型创建项目。用户通常在日常工作中会参与一个或多个项目。

    Starwhale Server/Cloud 项目按账号分组。Starwhale Standalone 没有帐号概念。所以您不会在S tarwhale Standalone 项目中看到任何帐号前缀。Starwhale Server/Cloud项目可以是“公共”或“私有”。公共项目意味着同一实例上的所有用户在默认情况下都自动成为该项目的“访客”角色。有关角色的更多信息,请参阅Starwhale中的角色和权限

    Starwhale Standalone会自动创建一个“self”项目并将其配置为默认项目。

    - + \ No newline at end of file diff --git a/zh/0.6.0/concepts/roles-permissions/index.html b/zh/0.6.0/concepts/roles-permissions/index.html index cd4a2f469..40b2a8af0 100644 --- a/zh/0.6.0/concepts/roles-permissions/index.html +++ b/zh/0.6.0/concepts/roles-permissions/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    Starwhale中的角色和权限

    角色用于为用户分配权限。只有Starwhale Server/Cloud有角色和权限,Starwhale Standalone没有相应概念。系统会自动创建一个管理员角色并分配给默认用户“starwhale”。一些敏感操作只能由具有管理员角色的用户执行,例如在Starwhale Server中创建新的账号。

    每个项目具有三类角色:

    • 管理员Admin - 项目管理员可以读写项目数据并将项目角色分配给用户。
    • 维护者Maintainer - 项目维护者可以读写项目数据。
    • 访客Guest - 项目访客只能读取项目数据。
    动作管理员Admin维护者Maintainer访客Guest
    管理项目成员
    编辑项目
    查看项目
    创建评价
    删除评价
    查看评价
    创建数据集
    更新数据集
    删除数据集
    查看数据集
    创建模型
    更新模型
    删除模型
    查看型号
    创建运行时
    更新运行时间
    删除运行时
    查看运行时间

    创建项目的用户成为第一个项目管理员。他可以在这之后将角色分配给其他用户。

    - + \ No newline at end of file diff --git a/zh/0.6.0/concepts/versioning/index.html b/zh/0.6.0/concepts/versioning/index.html index d0e72e883..5be3520dc 100644 --- a/zh/0.6.0/concepts/versioning/index.html +++ b/zh/0.6.0/concepts/versioning/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    Starwhale中的资源版本控制

    • Starwhale管理所有模型、数据集和运行时的历史记录。对特定资源的每次更新都会附加一个新版本的历史记录。
    • 版本由version id标识。version id是由 Starwhale自动生成的随机字符串,并按其创建时间排序。
    • 版本可以有标签。Starwhale使用版本标签来提供人性化的版本表示。默认情况下,Starwhale会为每个版本附加一个默认标签。默认标记是字母“v”后跟一个数字。对于每个版本化的资源,第一个版本标签始终标记为“v0”,第二个版本标记为“v1”,依此类推。有一个特殊的标签“latest”总是指向最新的版本。删除版本后,将不会重复使用其默认标签。例如,有一个带有标签“v0、v1、v2”的模型。 删除“v2”后,标签将为“v0、v1”。 接下来一个标签将是“v3”而不是“v2”。您可以将自己定义的标签附加到任何版本并随时删除它们。
    • Starwhale使用线性历史,不提供分支。
    • Starwhale资源无法真正回滚。当要恢复某个历史版本时,Starwhale会复制该版本数据并将其作为新版本追加到历史记录的末尾。您可以手动删除和恢复历史版本。
    - + \ No newline at end of file diff --git a/zh/0.6.0/dataset/index.html b/zh/0.6.0/dataset/index.html index b1d1b93ae..092f3955f 100644 --- a/zh/0.6.0/dataset/index.html +++ b/zh/0.6.0/dataset/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    Starwhale 数据集

    overview

    设计概述

    Starwhale Dataset 定位

    Starwhale Dataset 包含数据构建、数据加载和数据可视化三个核心阶段,是一款面向ML/DL领域的数据管理工具。Starwhale Dataset 能直接使用 Starwhale Runtime 构建的环境,能被 Starwhale ModelStarwhale Evaluation 无缝集成,是 Starwhale MLOps 工具链的重要组成部分。

    根据 Machine Learning Operations (MLOps): Overview, Definition, and Architecture 对MLOps Roles的分类,Starwhale Dataset的三个阶段针对用户群体如下:

    • 数据构建:Data Engineer、Data Scientist
    • 数据加载:Data Scientist、ML Developer
    • 数据可视化:Data Engineer、Data Scientist、ML Developer

    mlops-users

    核心功能

    • 高效加载:数据集原始文件存储在 OSS 或 NAS 等外部存储上,使用时按需加载,不需要数据落盘。
    • 简单构建:既支持从 Image/Video/Audio 目录、json文件和 Huggingface 数据集等来源一键构建数据集,又支持编写 Python 代码构建完全自定义的数据集。
    • 版本管理:可以进行版本追踪、数据追加等操作,并通过内部抽象的 ObjectStore,避免数据重复存储。
    • 数据集分发:通过 swcli dataset copy 命令,实现 Standalone 实例和 Cloud/Server 实例的双向数据集分享。
    • 数据可视化:Cloud/Server 实例的 Web 界面中可以对数据集提供多维度、多类型的数据呈现。
    • 制品存储:Standalone 实例能存储本地构建或分发的 swds 系列文件,Cloud/Server 实例使用对象存储提供集中式的 swds 制品存储。
    • Starwhale无缝集成Starwhale Dataset 能使用 Starwhale Runtime 构建的运行环境构建数据集。Starwhale EvaluationStarwhale Model 直接通过 --dataset 参数指定数据集,就能完成自动数据加载,便于进行推理、模型评测等环境。

    关键元素

    • swds 虚拟包文件:swdsswmpswrt 不一样,不是一个打包的单一文件,而是一个虚拟的概念,具体指的是一个目录,是 Starwhale 数据集某个版本包含的数据集相关的文件,包括 _manifest.yaml, dataset.yaml, 数据集构建的Python脚本和数据文件的链接等。可以通过 swcli dataset info 命令查看swds所在目录。swds 是Starwhale Dataset 的简写。

    swds-tree.png

    • swcli dataset 命令行:一组dataset相关的命令,包括构建、分发和管理等功能,具体说明参考CLI Reference
    • dataset.yaml 配置文件:描述数据集的构建过程,可以完全省略,通过 swcli dataset build 参数指定,可以认为 dataset.yaml 是build命令行参数的一种配置文件表示方式。swcli dataset build 参数优先级高于 dataset.yaml
    • Dataset Python SDK:包括数据构建、数据加载和若干预定义的数据类型,具体说明参考Python SDK
    • 数据集构建的 Python 脚本:使用 Starwhale Python SDK 编写的用来构建数据集的一系列脚本。

    最佳实践

    Starwhale Dataset 的构建是独立进行的,如果编写构建脚本时需要引入第三方库,那么使用 Starwhale Runtime 可以简化 Python 的依赖管理,能保证数据集的构建可复现。Starwhale 平台会尽可能多的内建开源数据集,让用户 copy 下来数据集后能立即使用。

    命令行分组

    Starwhale Dataset 命令行从使用阶段的角度上,可以划分如下:

    • 构建阶段
      • swcli dataset build
    • 可视化阶段
      • swcli dataset diff
      • swcli dataset head
    • 分发阶段
      • swcli dataset copy
    • 基本管理
      • swcli dataset tag
      • swcli dataset info
      • swcli dataset history
      • swcli dataset list
      • swcli dataset summary
      • swcli dataset remove
      • swcli dataset recover

    Starwhale Dataset Viewer

    目前 Cloud/Server 实例中 Web UI 可以对数据集进行可视化展示,目前只有使用 Python SDK 的DataType 才能被前端正确的解释,映射关系如下:

    • Image:展示缩略图、放大图、MASK类型图片,支持 image/pngimage/jpegimage/webpimage/svg+xmlimage/gifimage/apngimage/avif 格式。
    • Audio:展示为音频wave图,可播放,支持 audio/mp3audio/wav 格式。
    • Video:展示为视频,可播放,支持 video/mp4video/avivideo/webm 格式。
    • GrayscaleImage:展示灰度图,支持 x/grayscale 格式。
    • Text:展示文本,支持 text/plain 格式,设置设置编码格式,默认为utf-8。
    • Binary和Bytes:暂不支持展示。
    • Link:上述几种多媒体类型都支持指定link作为存储路径。

    Starwhale Dataset 数据格式

    数据集由多个行组成,每个行成为为一个样本,每个样本包含若干 features ,features 是一个类 dict 结构,对key和value有一些简单的限制[L]

    • dict的key必须为str类型。
    • dict的value必须是 int/float/bool/str/bytes/dict/list/tuple 等 Python 的基本类型,或者 Starwhale 内置的数据类型
    • 不同样本的数据相同key的value,不需要保持同一类型。
    • 如果value是list或者tuple,其元素的数据类型必须一致。
    • value为dict时,其限制等同于限制[L]

    例子:

    {
    "img": GrayscaleImage(
    link=Link(
    "123",
    offset=32,
    size=784,
    _swds_bin_offset=0,
    _swds_bin_size=8160,
    )
    ),
    "label": 0,
    }

    文件类数据的处理方式

    Starwhale Dataset 对文件类型的数据进行了特殊处理,如果您不关心 Starwhale 的实现方式,可以忽略本小节。

    根据实际使用场景,Starwhale Dataset 对基类为 starwhale.BaseArtifact 的文件类数据有两种处理方式:

    • swds-bin: Starwhale 以自己的二进制格式 (swds-bin) 将数据合并成若干个大文件,能高效的进行索引、切片和加载。
    • remote-link: 满足用户的原始数据存放在某些外部存储上,比如 OSS 或 NAS 等,原始数据较多,不方便搬迁或者已经用一些内部的数据集实现进行封装过,那么只需要在数据中使用 link,就能建立索引。

    在同一个Starwhale 数据集中,可以同时包含两种类型的数据。

    - + \ No newline at end of file diff --git a/zh/0.6.0/dataset/yaml/index.html b/zh/0.6.0/dataset/yaml/index.html index f0ffb3c3b..b8b3572fe 100644 --- a/zh/0.6.0/dataset/yaml/index.html +++ b/zh/0.6.0/dataset/yaml/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    dataset.yaml 使用指南

    提示

    dataset.yaml 对于 swcli dataset build 构建数据集的过程是非必要的。

    Starwhale Dataset 构建的时候使用 dataset.yaml,若省略 dataset.yaml,则可以在 swcli dataset build 命令行参数中描述相关配置,可以认为 dataset.yamlbuild 命令行的配置文件化表述。

    YAML 字段描述

    字段描述是否必要类型默认值
    nameStarwhale Dataset的名字String
    handler为一个函数,返回一个Generator或一个可迭代的对象或一个实现 __iter__ 方法的类,格式为 {module 路径}:{类名函数名}String
    desc数据集描述信息String""
    versiondataset.yaml格式版本,目前仅支持填写 1.0String1.0
    attr数据集构建参数Dict
    attr.volume_sizeswds-bin格式的数据集每个data文件的大小。当写数字时,单位bytes;也可以是数字+单位格式,如64M, 1GB等Int或Str64MB
    attr.alignment_sizeswds-bin格式的数据集每个数据块的数据alignment大小,如果设置alignment_size为4k,数据块大小为7.9K,则会补齐0.1K的空数据,让数据块为alignment_size的整数倍,提升page size等读取效率Integer或String128

    使用示例

    最简示例

    name: helloworld
    handler: dataset:ExampleProcessExecutor

    helloworld的数据集,使用dataset.yaml目录中dataset.py文件中的 ExampleProcessExecutor 类进行数据构建。

    MNIST数据集构建示例

    name: mnist
    handler: mnist.dataset:DatasetProcessExecutor

    desc: MNIST data and label test dataset

    attr:
    alignment_size: 1k
    volume_size: 4M

    handler为generator function的例子

    dataset.yaml 内容:

    name: helloworld
    handler: dataset:iter_item

    dataset.py 内容:

    def iter_item():
    for i in range(10):
    yield {"img": f"image-{i}".encode(), "label": i}

    本例中,handler为一个generator function,Starwhale SDK根据首个yield出来的元素为非Starwhale.Link类型,等同于继承 starwhale.SWDSBinBuildExecutor 类。

    - + \ No newline at end of file diff --git a/zh/0.6.0/evaluation/heterogeneous/node-able/index.html b/zh/0.6.0/evaluation/heterogeneous/node-able/index.html index 979607f23..b4091ffe0 100644 --- a/zh/0.6.0/evaluation/heterogeneous/node-able/index.html +++ b/zh/0.6.0/evaluation/heterogeneous/node-able/index.html @@ -10,7 +10,7 @@ - + @@ -24,7 +24,7 @@ 参考 链接

    v0.13.0-rc.1 为例

    kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.13.0-rc.1/nvidia-device-plugin.yml

    注意: 此操作会在所有的 K8s 节点中运行 NVIDIA 的 device plugin 插件, 如果之前配置过, 则会被更新, 请谨慎评估使用的镜像版本

  • 确认 GPU 可以在集群中发现和使用 参考下边命令, 查看 Jetson 节点的 Capacity 中有 nvidia.com/gpu, GPU 即被 K8s 集群正常识别

    # kubectl describe node orin | grep -A15 Capacity
    Capacity:
    cpu: 12
    ephemeral-storage: 59549612Ki
    hugepages-1Gi: 0
    hugepages-2Mi: 0
    hugepages-32Mi: 0
    hugepages-64Ki: 0
    memory: 31357608Ki
    nvidia.com/gpu: 1
    pods: 110
  • 制作和使用自定义镜像

    文章前面提到的 l4t-jetpack 镜像可以满足我们一般的使用, 如果我们需要自己定制更加精简或者更多功能的镜像, 可以基于 l4t-base 来制作 相关 Dockerfile 可以参考 Starwhale为mnist制作的镜像

    - + \ No newline at end of file diff --git a/zh/0.6.0/evaluation/heterogeneous/virtual-node/index.html b/zh/0.6.0/evaluation/heterogeneous/virtual-node/index.html index ab8ad0a04..92fa71e4c 100644 --- a/zh/0.6.0/evaluation/heterogeneous/virtual-node/index.html +++ b/zh/0.6.0/evaluation/heterogeneous/virtual-node/index.html @@ -10,7 +10,7 @@ - + @@ -19,7 +19,7 @@ 此方案被各云厂商广泛用于 serverless 容器集群方案, 比如阿里云的 ASK, Amazon 的 AWS Fargate 等.

    原理

    virtual kubelet 框架将 kubelet 对于 Node 的相关接口进行实现, 只需要简单的配置即可模拟一个节点. 我们只需要实现 PodLifecycleHandler 接口即可支持:

    • 创建, 更新, 删除 Pod
    • 获取 Pod 状态
    • 获取 Container 日志

    将设备加入集群

    如果我们的设备由于资源限制等情况无法作为 K8s 的一个节点进行服务, 那么我们可以通过使用 virtual kubelet 模拟一个代理节点的方式对这些设备进行管理, Starwhale Controller 和设备的控制流如下


    ┌──────────────────────┐ ┌────────────────┐ ┌─────────────────┐ ┌────────────┐
    │ Starwhale Controller ├─────►│ K8s API Server ├────►│ virtual kubelet ├────►│ Our device │
    └──────────────────────┘ └────────────────┘ └─────────────────┘ └────────────┘

    virtual kubelet 将 Starwhale Controller 下发下来的 Pod 编排信息转化为对设备的控制行为, 比如 ssh 到设备上执行一段命令, 或者通过 USB 或者串口发送一段消息等.

    下面是使用 virtual kubelet 的方式来对一个未加入集群的可以 ssh 的设备进行控制的示例

    1. 准备证书
    • 创建文件 vklet.csr, 内容如下
    [req]
    req_extensions = v3_req
    distinguished_name = req_distinguished_name
    [req_distinguished_name]
    [v3_req]
    basicConstraints = CA:FALSE
    keyUsage = digitalSignature, keyEncipherment
    extendedKeyUsage = serverAuth
    subjectAltName = @alt_names
    [alt_names]
    IP = 1.2.3.4
    • 生成证书
    openssl genrsa -out vklet-key.pem 2048
    openssl req -new -key vklet-key.pem -out vklet.csr -subj '/CN=system:node:1.2.3.4;/C=US/O=system:nodes' -config ./csr.conf
    • 提交证书
    cat vklet.csr| base64 | tr -d "\n" # 输出内容作为 csr.yaml 文件中 spec.request 的内容

    csr.yaml

    apiVersion: certificates.k8s.io/v1
    kind: CertificateSigningRequest
    metadata:
    name: vklet
    spec:
    request: ******************************************************
    signerName: kubernetes.io/kube-apiserver-client
    expirationSeconds: 1086400
    usages:
    - client auth
     kubectl apply -f csr.yaml
    kubectl certificate approve vklet
    kubectl get csr vklet -o jsonpath='{.status.certificate}'| base64 -d > vklet-cert.pem

    现在我们得到了 vklet-cert.pem

    • 编译 virtual kubelet
    git clone https://github.com/virtual-kubelet/virtual-kubelet
    cd virtual-kubelet && make build

    创建节点的配置文件 mock.json

    {
    "virtual-kubelet":
    {
    "cpu": "100",
    "memory": "100Gi",
    "pods": "100"
    }
    }

    启动 virtual kubelet

    export APISERVER_CERT_LOCATION=/path/to/vklet-cert.pem
    export APISERVER_KEY_LOCATION=/path/to/vklet-key.pem
    export KUBECONFIG=/path/to/kubeconfig

    virtual-kubelet --provider mock --provider-config /path/to/mock.json

    至此, 我们使用 virtual kubelet 模拟了一个 100 core + 100G 内存的节点.

    • 增加 PodLifecycleHandler 的实现, 将 Pod 编排中的重要信息转化为 ssh 命令执行, 并且收集日志待 Starwhale Controller 收集

    具体实现可参考 ssh executor

    - + \ No newline at end of file diff --git a/zh/0.6.0/evaluation/index.html b/zh/0.6.0/evaluation/index.html index 300f88405..40d87c1fc 100644 --- a/zh/0.6.0/evaluation/index.html +++ b/zh/0.6.0/evaluation/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    Starwhale 模型评测

    设计概述

    Starwhale Evaluation 定位

    Starwhale Evaluation 目标是对模型评测进行全流程管理,包括创建 Job、分发 Task、查看模型评测报告和基本管理等。Starwhale Evaluation 是 Starwhale构建的 MLOps 工具链使用 Starwhale ModelStarwhale DatasetStarwhale Runtime 三个基础元素,在模型评测这个场景上的具体应用,后续还会包含 Starwhale Model ServingStarwhale Training 等应用场景。

    核心功能

    • 可视化展示swcli和 Web UI都提供对模型评测结果的可视化展示,支持多个结果的对比等功能,同时用户可以自定义记录评测中间过程。
    • 多场景适配:不管是在笔记本的单机环境,还是在分布式服务器集群环境,都能使用统一的命令、Python脚本、制品和操作方法进行模型评测,满足不同算力、不同数据量的外部环境要求。
    • Starwhale无缝集成:使用Starwhale Runtime提供的运行环境,将 Starwhale Dataset 作为数据输入,在 Starwhale Model 中运行模型评测任务,不管是在 swcli、Python SDK 还是 Cloud/Server 实例 Web UI中,都能简单的进行配置。

    关键元素

    • swcli model run 命令行: 能够完成模型的批量、离线式评测。
    • swcli model serve 命令行: 能够完成模型的在线评测。

    最佳实践

    命令行分组

    从完成 Starwhale Evaluation 全流程任务的角度,可以将所涉及的命令分组如下:

    • 基础准备阶段
      • swcli dataset build 或 Starwhale Dataset Python SDK
      • swcli model build 或 Starwhale Model Python SDK
      • swcli runtime build
    • 评测阶段
      • swcli model run
      • swcli model serve
    • 结果展示阶段
      • swcli job info
    • 基本管理
      • swcli job list
      • swcli job remove
      • swcli job recover

    job-step-task 抽象

    • job: 一次模型评测任务就是一个 job,一个 job 包含一个或多个 step
    • step: step 对应评测过程中的某个阶段。使用PipelineHandler的默认评测过程,step就是predictevaluate;用户自定义的评测过程,step 就是使用 @handler, @evaluation.predict, @evaluation.evaluate 修饰的函数。step 之间可以有依赖关系,形成一个DAG。一个 step 包含一个或多个 task。同一 step 中的不同 task,执行逻辑是一致的,只是输入参数不同,常见做法是将数据集分割成若干部分,然后传入每个task 中,task 可以并行执行。
    • task: task 是最终运行的实体。在 Cloud/Server 实例中,一个 task 就是一个Pod的container; 在Standalone 实例中,一个 task 就是一个 Python Thread。

    job-step-task 的抽象是实现 Starwhale Evaluation 分布式运行的基础。

    - + \ No newline at end of file diff --git a/zh/0.6.0/faq/index.html b/zh/0.6.0/faq/index.html index fbee60814..a0c799e8d 100644 --- a/zh/0.6.0/faq/index.html +++ b/zh/0.6.0/faq/index.html @@ -10,13 +10,13 @@ - +
    - + \ No newline at end of file diff --git a/zh/0.6.0/getting-started/cloud/index.html b/zh/0.6.0/getting-started/cloud/index.html index 00a9982e7..73d137123 100644 --- a/zh/0.6.0/getting-started/cloud/index.html +++ b/zh/0.6.0/getting-started/cloud/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    Starwhale Cloud入门指南

    Starwhale Cloud运行在阿里云上,域名是 https://cloud.starwhale.cn ,后续我们会推出部署在AWS上的 https://cloud.starwhale.ai 服务,需要注意的是,这是两个相互独立的实例,帐户和数据不共享。您可以选择任何一个开始。

    在开始之前,您需要先安装Starwhale Client(swcli)

    注册Starwhale Cloud并创建您的第一个项目

    您可以直接使用自己的GitHub或微信帐号登录,也可以注册一个新的帐号。如果您使用 GitHub 或 微信帐号登录,系统会要求您提供用户名。

    然后您可以创建一个新项目。在本教程中,我们将使用名称 demo 作为项目名称。

    在本地机器上构建数据集、模型和运行时

    按照Starwhale Standalone入门指南中的步骤1到步骤4在本地机器上创建:

    • 一个名为mnist的Starwhale模型
    • 一个名为mnist的Starwhale数据集
    • 一个名为pytorch的Starwhale运行时

    登录云实例

    swcli instance login --username <您的用户名> --password <您的密码> --alias swcloud https://cloud.starwhale.cn

    将数据集、模型和运行时复制到Starwhale Cloud

    swcli model copy mnist swcloud/project/demo
    swcli dataset copy mnist swcloud/project/demo
    swcli runtime copy pytorch swcloud/project/demo

    使用 Web UI 运行评估

    console-create-job.gif

    恭喜! 您已完成Starwhale Cloud的入门指南。

    - + \ No newline at end of file diff --git a/zh/0.6.0/getting-started/index.html b/zh/0.6.0/getting-started/index.html index 239f99c29..022711f9d 100644 --- a/zh/0.6.0/getting-started/index.html +++ b/zh/0.6.0/getting-started/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    入门指南

    首先,您需要安装Starwhale Client(swcli),可以运行如下命令:

    python3 -m pip install starwhale

    更多详细信息请参阅swcli安装指南

    根据您使用的实例类型,您可以参考以下三个入门指南:

    • Starwhale Standalone入门指南 - 本指南可帮助您在台式PC/笔记本电脑上运行一个MNIST评估。这是开始使用Starwhale最快最简单的方法。
    • Starwhale Server入门指南 - 本指南可帮助您在私有服务器上安装Starwhale Server并运行一个MNIST评估。在本指南结束时,您将拥有一个Starwhale Server实例,您可以在其中管理您的数据集和模型。
    • Starwhale Cloud入门指南 - 本指南可帮助您在Starwhale Cloud上创建帐户并运行MNIST评估。这是体验所有Starwhale功能的最简单方法。
    - + \ No newline at end of file diff --git a/zh/0.6.0/getting-started/runtime/index.html b/zh/0.6.0/getting-started/runtime/index.html index 237ac26d5..80920725f 100644 --- a/zh/0.6.0/getting-started/runtime/index.html +++ b/zh/0.6.0/getting-started/runtime/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    Starwhale Runtime入门指南

    本文演示如何搭建Pytorch环境的Starwhale Runtime以及如何在不同环境中使用它。该runtime可以满足Starwhale中六个例子的依赖需求:mnist、speech commands、nmt、cifar10、ag_news、PennFudan。相关代码链接:example/runtime/pytorch

    您可以从本教程中学到以下内容:

    • 如何构建Starwhale Runtime。
    • 如何在不同场景下使用Starwhale Runtime。
    • 如何发布Starwhale Runtime。

    前置条件

    基础环境

    运行以下命令以克隆示例代码:

    git clone https://github.com/star-whale/starwhale.git
    cd starwhale/example/runtime/pytorch-cn-mirror #非中国大陆网络可使用pytorch例子

    构建Starwhale Runtime

    ❯ swcli -vvv runtime build --yaml runtime.yaml

    在Standalone Instance中使用Starwhale Runtime

    在shell中使用Starwhale Runtime

    # 激活runtime
    swcli runtime activate pytorch-cn-mirror

    swcli runtime activate会下载runtime的所有python依赖,并在当前shell环境中激活该环境。这个过程可能需要很长时间。

    当runtime被激活时,所有依赖项都已在您的python环境中准备就绪,类似于virtualenv的source venv/bin/activate或者conda的conda activate命令。如果您关闭了shell或切换到另一个shell,则下次使用之前需要重新激活这个runtime。

    在swcli中使用Starwhale Runtime

    # 模型构建中使用runtime
    swcli model build . --runtime pytorch-cn-mirror
    # 数据集构建中使用runtime
    swcli dataset build . --runtime pytorch-cn-mirror
    # 模型评测中使用runtime
    swcli model run --uri mnist/version/v0 --dataset mnist --runtime pytorch-cn-mirror

    将 Starwhale Runtime 复制到另一个实例

    您可以将运行时复制到Server/Cloud实例,然后可以在Server/Cloud实例中使用或由其他用户下载。

    # 将runtime复制到名为“pre-k8s”的Server实例
    ❯ swcli runtime copy pytorch-cn-mirror cloud://pre-k8s/project/starwhale
    - + \ No newline at end of file diff --git a/zh/0.6.0/getting-started/server/index.html b/zh/0.6.0/getting-started/server/index.html index 68324f955..47e7b705f 100644 --- a/zh/0.6.0/getting-started/server/index.html +++ b/zh/0.6.0/getting-started/server/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    Starwhale Server入门指南

    安装Starwhale Server

    安装 Starwhale Server,参见安装指南

    创建您的第一个项目

    登录服务器

    打开浏览器并在地址栏中输入服务器的 URL。 使用默认用户名(starwhale)和密码(abcd1234)登录。

    console-artifacts.gif

    创建一个新项目

    在本地机器上构建数据集、模型和运行时

    按照Starwhale Standalone入门指南中的步骤1到步骤4在本地机器上创建:

    • 一个名为mnist的Starwhale模型
    • 一个名为mnist的Starwhale数据集
    • 一个名为pytorch的Starwhale运行时

    将数据集、模型和运行时复制到Starwhale Server

    swcli instance login --username <your username> --password <your password> --alias server <Your Server URL>

    swcli model copy mnist server/project/demo
    swcli dataset copy mnistserver/project/demo
    swcli runtime copy pytorch server/project/demo

    使用Web UI运行模型评估

    使用浏览器打开“demo”项目并创建一个新的评估。

    console-create-job.gif

    恭喜! 您已完成Starwhale Server的入门指南。

    - + \ No newline at end of file diff --git a/zh/0.6.0/getting-started/standalone/index.html b/zh/0.6.0/getting-started/standalone/index.html index 3c5ca3871..cb7aeedea 100644 --- a/zh/0.6.0/getting-started/standalone/index.html +++ b/zh/0.6.0/getting-started/standalone/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    Starwhale Standalone入门指南

    Starwhale Client(swcli)安装完成后,您就可以使用Starwhale Standalone。

    我们也提供对应的Jupyter Notebook例子,可以在 Google Colab 或本地的 vscode/jupyterlab 中试用。

    下载例子

    通过以下方式克隆Starwhale项目来下载Starwhale示例:

    GIT_LFS_SKIP_SMUDGE=1 git clone https://github.com/star-whale/starwhale.git --depth 1
    cd starwhale

    为了节省例子的下载时间,我们执行git clone命令时,忽略了git-lfs,并只保留最近一次的commit信息。我们选用ML/DL领域的HelloWorld程序-MNIST来介绍如何从零开始构建数据集、模型包和运行环境,并最终完成模型评测。接下来的操作都在 starwhale 目录中进行。

    核心工作流程

    构建 Pytorch 运行时

    运行时示例代码位于example/runtime/pytorch目录中。

    • 构建Starwhale运行时包:

      swcli runtime build --yaml example/runtime/pytorch/runtime.yaml
      提示

      当首次构建Starwhale Runtime时,由于需要创建venv或conda隔离环境,并下载相关的Python依赖,命令执行需要花费一段时间。时间长短取决与所在机器的网络情况和runtime.yaml中Python依赖的数量。建议合理设置机器的 ~/.pip/pip.conf 文件,填写缓存路径和适合当前网络环境的pypi mirror地址。

      处于中国大陆网络环境中的用户,可以参考如下配置:

      [global]
      cache-dir = ~/.cache/pip
      index-url = https://pypi.tuna.tsinghua.edu.cn/simple
      extra-index-url = https://mirrors.aliyun.com/pypi/simple/
    • 检查您本地的Starwhale运行时:

      swcli runtime list
      swcli runtime info pytorch

    构建模型

    模型示例代码位于 example/mnist 目录中。

    • 下载预训练模型文件:

      cd example/mnist
      CN=1 make download-model
      # 非中国大陆网络用户,可以省略 CN=1 环境变量
      cd -
    • 构建一个Starwhale模型:

      swcli model build example/mnist --runtime pytorch
    • 检查您本地的Starwhale模型:

      swcli model list
      swcli model info mnist

    构建数据集

    数据集示例代码位于 example/mnist 目录中。

    • 下载MNIST原始数据:

      cd example/mnist
      CN=1 make download-data
      # 非中国大陆网络用户,可以省略 CN=1 环境变量
      cd -
    • 构建Starwhale数据集:

      swcli dataset build --yaml example/mnist/dataset.yaml
    • 检查您本地的Starwhale数据集:

      swcli dataset list
      swcli dataset info mnist
      swcli dataset head mnist

    运行评估作业

    • 创建评估工作

      swcli -vvv model run --uri mnist --dataset mnist --runtime pytorch
    • 检查评估结果

      swcli job list
      swcli job info $(swcli job list | grep mnist | grep success | awk '{print $1}' | head -n 1)

    恭喜! 您已完成Starwhale Standalone的入门指南。

    - + \ No newline at end of file diff --git a/zh/0.6.0/index.html b/zh/0.6.0/index.html index 095577c28..f3e95087f 100644 --- a/zh/0.6.0/index.html +++ b/zh/0.6.0/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    什么是Starwhale

    概述

    Starwhale是一个 MLOps/LLMOps平台,能够让您的模型创建、评估和发布流程变得更加轻松。它旨在为数据科学家和机器学习工程师创建一个方便的工具。

    Starwhale能够帮助您:

    • 跟踪您的训练/测试数据集历史记录,包括所有数据项及其相关标签,以便您轻松访问它们。
    • 管理您可以在团队中共享的模型包。
    • 在不同的环境中运行您的模型,无论是在 Nvidia GPU服务器上还是在嵌入式设备(如 Cherry Pi)上。
    • 为您的模型快速创建配备交互式 Web UI的在线服务。

    同时,Starwhale 是一个开放的平台,您可以创建插件来满足自己的需求。

    部署选项

    Starwhale的每个部署称为一个实例。所有实例都可以通过Starwhale Client(swcli)进行管理。

    您可以任选以下实例类型之一开始使用:

    • Starwhale Standalone - Starwhale Standalone 本质上是一套存储在本地文件系统中的数据库。它由 Starwhale Client(swcli)创建和管理。您只需安装 swcli 即可使用。目前,一台机器上的每个用户只能拥有一个Starwhale Standalone 实例。我们建议您使用 Starwhale Standalone 来构建和测试您的数据集和模型,然后再将它们推送到 Starwhale Server/Cloud 实例。
    • Starwhale Server - Starwhale Server 是部署在您本地服务器上的服务。除了 Starwhale Client(swcli)的文本交互界面,Starwhale Server还提供 Web UI供您管理数据集和模型,以及在Kubernetes集群中运行模型并查看运行结果。
    • Starwhale Cloud - Starwhale Cloud 是托管在公共云上的服务。 通过在https://cloud.starwhale.cn注册一个账号,您就可以使用Starwhale,而无需安装、运行和维护您自己的实例。 Starwhale Cloud 还提供公共资源供您下载,例如一些流行的开源集数据集、模型和运行时。查看 Starwhale Cloud 实例上的 “starwhale/public”项目以获取更多详细信息。

    在您决定要使用的实例类型时,请考虑以下因素:

    实例类型部署位置维护者用户界面可扩展性
    Starwhale Standalone您的笔记本电脑或本地服务器不需要命令行不可扩展
    Starwhale Server您的数据中心您自己Web UI和命令行可扩展,取决于您的 Kubernetes 集群
    Starwhale Cloud公共云,如AWS或阿里云Starwhale团队Web UI和命令行可扩展,但目前受到云上免费可用资源的限制
    - + \ No newline at end of file diff --git a/zh/0.6.0/model/index.html b/zh/0.6.0/model/index.html index 4b9388d47..1ce473d0d 100644 --- a/zh/0.6.0/model/index.html +++ b/zh/0.6.0/model/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    Starwhale 模型

    overview

    Starwhale 模型是一种机器学习模型的标准包格式,可用于多种用途,例如模型微调、模型评估和在线服务。 Starwhale 模型包含模型文件、推理代码、配置文件等等。

    创建一个 Starwhale 模型

    创建 Starwhale 模型有两种方法:通过 swcli 或通过 SDK

    使用 swcli 创建 Starwhale 模型

    使用 swcli 创建 Starwhale 模型之前,您可以定义一个model.yaml,其中描述了关于Starwhale模型的一些必要信息,然后运行以下命令:

    swcli model build . --model-yaml /path/to/model.yaml

    有关该命令和 model.yaml 的更多信息,请参阅swcli参考。需要注意的是,model.yaml 是非必要的。

    使用 Python SDK 创建 Starwhale 模型

    from starwhale import model, predict

    @predict
    def predict_img(data):
    ...

    model.build(name="mnist", modules=[predict_img])

    管理 Starwhale 模型

    使用 swcli 管理 Starwhale 模型

    命令说明
    swcli model list列出项目中所有Starwhale模型
    swcli model info显示有关Starwhale模型的详细信息
    swcli model copy将Starwhale模型复制到另一个位置
    swcli model remove删除Starwhale模型
    swcli model recover恢复之前删除的Starwhale模型

    使用 Web 界面管理 Starwhale 模型

    管理 Starwhale 模型的历史版本

    Starwhale 模型是版本化的。关于版本的基本信息可以参考 Starwhale中的资源版本控制

    使用 swcli 管理 Starwhale 模型的历史版本

    命令说明
    swcli model history列出Starwhale模型的所有版本
    swcli model info显示某个Starwhale模型版本的详细信息
    swcli model diff比较两个版本的Starwhale模型
    swcli model copy复制某个Starwhale模型版本到新的版本
    swcli model remove删除某个Starwhale模型版本
    swcli model recover恢复以前删除的Starwhale模型版本

    模型评估

    使用swcli进行模型评估

    命令说明
    swcli model run指定某个Starwhale模型进行模型评估

    存储格式

    Starwhale模型是一个打包了原始目录的tar文件。

    - + \ No newline at end of file diff --git a/zh/0.6.0/model/yaml/index.html b/zh/0.6.0/model/yaml/index.html index 5c388f21f..ca2725726 100644 --- a/zh/0.6.0/model/yaml/index.html +++ b/zh/0.6.0/model/yaml/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    model.yaml 使用指南

    提示

    model.yaml 对于 swcli model build 构建模型的过程是非必要的。

    Starwhale Model 构建时,若使用 swcli model build 命令,可以通过 --model-yaml 参数指定符合特定格式的yaml文件,简化模型构建的参数指定。

    即使不指定 --model-yaml 参数,swcli model build 也会自动寻找 ${workdir} 目录下的 model.yaml 文件,会提取其中的参数。swcli model build 命令行中指定参数优先级大于 model.yaml 中的等价配置,可以认为 model.yamlbuild 命令行的配置文件化表述。

    当使用 Python SDK 方式构建 Starwhale 模型时,model.yaml 文件不生效。

    YAML 字段描述

    字段描述是否必要类型默认值
    nameStarwhale Model 的名字,等价于 --name 参数。String
    run.modules模型构建时搜索的Python Moduels,可以指定多个模型运行的入口点,格式为 Python 可 Imported 路径。等价于 --module 参数。List[String]
    run.handlerrun.modules的曾用写法,只能指定一个模型运行的入口点,已废弃String
    versiondataset.yaml格式版本,目前仅支持填写 1.0String1.0
    desc数据集描述信息,等价于 --desc 参数。String

    使用示例

    name: helloworld
    run:
    modules:
    - src.evaluator
    desc: "example yaml"

    名称为 helloworld 的 Starwhale 模型,搜索 swcli model build {WORKDIR} 命令中 ${WORKDIR} 目录相对的 src/evaluator.py 文件中被 @evaluation.predict, @evaluation.evaluate@handler 修饰的函数, 或继承自 PipelineHandler 的类,这些函数或类会被加入 Starwhale 模型可运行的入口点列表中,在 swcli model run 或 Web UI 运行时,选择对应的入口点(handler)运行模型。

    model.yaml 是非必要的,yaml 中定义参数可以在 swcli 命令行参数中指定。

    swcli model build . --model-yaml model.yaml

    等价于:

    swcli model build . --name helloworld --module src.evaluator --desc "example yaml"
    - + \ No newline at end of file diff --git a/zh/0.6.0/reference/sdk/dataset/index.html b/zh/0.6.0/reference/sdk/dataset/index.html index cd5c99918..d0039890a 100644 --- a/zh/0.6.0/reference/sdk/dataset/index.html +++ b/zh/0.6.0/reference/sdk/dataset/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    Starwhale 数据集 SDK

    dataset

    获取 starwhale.Dataset 对象,包括创建新的数据集和加载已经存在的数据集两种方式。

    @classmethod
    def dataset(
    cls,
    uri: t.Union[str, Resource],
    create: str = _DatasetCreateMode.auto,
    readonly: bool = False,
    ) -> Dataset:

    参数

    • uri: (str 或 Resource, required)
      • Dataset URI 格式的字符串或 Resource 对象。
    • create: (str, optional)
      • 数据集创建模式,包括 auto, emptyforbid 三种方式。
        • auto 模式: 如果数据集已经存在,不会自动创建数据集;如果数据集不存在,则自动创建数据集。
        • empty 模式: 如果数据集已经存在,则抛出异常;如果数据集不存在,则自动创建数据集。
        • forbid 模式: 如果数据集已经存在,则不做任何事情;如果数据集不存在,则抛出异常。forbid 模式能确保数据集存在。
      • auto 模式是默认值。
    • readonly: (bool, optional)
      • 对于已经存在的数据集,可以指定 readonly=True 保证数据集以只读方式加载。
      • 默认值为 False

    使用示例

    from starwhale import dataset, Image

    # create a new dataset named mnist, and add a row into the dataset
    # dataset("mnist") is equal to dataset("mnist", create="auto")
    ds = dataset("mnist")
    ds.exists() # return False, "mnist" dataset is not existing.
    ds.append({"img": Image(), "label": 1})
    ds.commit()
    ds.close()

    # load a cloud instance dataset in readonly mode
    ds = dataset("cloud://remote-instance/project/starwhale/dataset/mnist", readonly=True)
    labels = [row.features.label in ds]
    ds.close()

    # load a read/write dataset with a specified version
    ds = dataset("mnist/version/mrrdczdbmzsw")
    ds[0].features.label = 1
    ds.commit()
    ds.close()

    # create an empty dataset
    ds = dataset("mnist-empty", create="empty")

    # ensure the dataset existence
    ds = dataset("mnist-existed", create="forbid")

    class starwhale.Dataset

    starwhale.Dataset 实现 Starwhale 数据集的抽象,能够对Standalone/Server/Cloud 实例上的数据集进行操作。

    from_huggingface

    from_huggingface 是一个 classmethod 方法,能够将 Huggingface 上的数据集转化为 Starwhale 数据集。

    def from_huggingface(
    cls,
    name: str,
    repo: str,
    subset: str | None = None,
    split: str | None = None,
    revision: str = "main",
    alignment_size: int | str = D_ALIGNMENT_SIZE,
    volume_size: int | str = D_FILE_VOLUME_SIZE,
    mode: DatasetChangeMode | str = DatasetChangeMode.PATCH,
    cache: bool = True,
    tags: t.List[str] | None = None,
    ) -> Dataset:

    参数

    • name: (str, required)
      • 数据集名称。
    • repo: (str, required)
    • subset: (str, optional)
      • Huggingface的数据集 subset 名称,如果HF数据集有多个 subsets, 您务必要指定一个 subset。
    • split: (str, optional)
      • Huggingface的数据集中 Split 名称。如果没有指定 split,则数据集中所有的 splits 数据都会被构建。
    • revision: (str, optional)
      • Huggingface的数据集版本,默认是 main,即main分支的最新一次提交。参数接受branch, tag 或 commit hash。
    • alignment_size: (int|str, optional)
      • 数据集 blob 文件中数据对齐的尺寸。
      • 默认值为 128,即128个字节对齐。
    • volume_size: (int|str, optional)
      • 数据集 blob 文件的最大尺寸,超过后会产生一个新的 blob 文件。
      • 默认值为 64MB,即每个blob文件64MB大小。
    • mode: (str|DatasetChangeMode, optional)
      • 数据集更新的模式,包括 patch 模式和 overwrite 模式。
      • 默认为 patch 模式。
    • cache: (bool, optional)
      • 是否使用 Huggingface 的本地缓存。
      • 默认使用缓存。
      • 缓存 = 下载文件缓存 + 本地Huggingface 数据集缓存。
    • tags: (List[str], optional)
      • 用户自定义的数据集标签。

    使用示例

    from starwhale import Dataset
    myds = Dataset.from_huggingface("mnist", "mnist")
    print(myds[0])
    from starwhale import Dataset
    myds = Dataset.from_huggingface("mmlu", "cais/mmlu", subset="anatomy", split="auxiliary_train", revision="7456cfb")

    from_json

    from_json 是一个 classmethod 方法,能够将 json 字符串转化为 Starwhale 数据集。

    @classmethod
    def from_json(
    cls,
    name: str,
    json_text: str,
    field_selector: str = "",
    alignment_size: int | str = D_ALIGNMENT_SIZE,
    volume_size: int | str = D_FILE_VOLUME_SIZE,
    mode: DatasetChangeMode | str = DatasetChangeMode.PATCH,
    tags: t.List[str] | None = None,
    ) -> Dataset:

    参数

    • name: (str, required)
      • 数据集名称
    • json_text: (str, required)
      • json 字符串,from_json 函数会序列化该字符串为 Python 对象,然后开始构建 Starwhale 数据集。
    • field_selector: (str, optional)
      • 可以提取 json_text 中特定的 array 结构。
      • 默认从 json 的根提取数据。
    • alignment_size: (int|str, optional)
      • 数据集 blob 文件中数据对齐的尺寸。
      • 默认值为 128,即128个字节对齐。
    • volume_size: (int|str, optional)
      • 数据集 blob 文件的最大尺寸,超过后会产生一个新的 blob 文件。
      • 默认值为 64MB,即每个blob文件64MB大小。
    • mode: (str|DatasetChangeMode, optional)
      • 数据集更新的模式,包括 patch 模式和 overwrite 模式。
      • 默认为 patch 模式。
    • tags: (List[str], optional)
      • 用户自定义的数据集标签。

    使用示例

    from starwhale import Dataset
    myds = Dataset.from_json(
    name="translation",
    json_text='[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]'
    )
    print(myds[0].features.en)
    from starwhale import Dataset
    myds = Dataset.from_json(
    name="translation",
    json_text='{"content":{"child_content":[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]}}',
    field_selector="content.child_content"
    )
    print(myds[0].features["zh-cn"])

    from_folder

    from_folder 是一个 classmethod 方法,能够读取指定目录中的 Image/Video/Audio 数据,并将其自动转化为 Starwhale 数据集。该函数支持如下特性:

    • 能够递归的搜索目标目录及子目录
    • 支持三种类型的文件提取:
      • image: 支持 png/jpg/jpeg/webp/svg/apng 图片类型。图片文件会被转化为 Starwhale.Image 类型。
      • video: 支持 mp4/webm/avi 视频类型。视频文件会被转化为 Starwhale.Video 类型。
      • audio: 支持 mp3/wav 音频类型。音频文件会被转化为 Starwhale.Audio 类型。
    • 每个文件对应数据集的一条记录,文件对应的数据集字段名称为 file
    • auto_label=True,则会使用父目录的名称作为该条数据的标签,对应 label 字段。根目录下的文件,则不会被打标签。
    • 若存在与 image/video/audio 同名的 txt 文件,则该文件内容会被作为 caption 字段内容存放到数据集中。
    • 若根目录存在 metadata.csvmetadata.jsonl 文件,则会自动读取文件的内容,并将其通过文件路径名作为关联,存入数据集中,可以用来指定 meta 信息。
      • metadata.csvmetadata.jsonl 文件是互斥的,当都存在的时候,程序会抛出异常。
      • metadata.csvmetadata.jsonl 每行记录中需要包含 file_name 字段,指向对应文件的路径。
      • metadata.csvmetadata.jsonl 对于数据集构建是可选的。
    @classmethod
    def from_folder(
    cls,
    folder: str | Path,
    kind: str | DatasetFolderSourceType,
    name: str | Resource = "",
    auto_label: bool = True,
    alignment_size: int | str = D_ALIGNMENT_SIZE,
    volume_size: int | str = D_FILE_VOLUME_SIZE,
    mode: DatasetChangeMode | str = DatasetChangeMode.PATCH,
    tags: t.List[str] | None = None,
    ) -> Dataset:

    参数

    • folder: (str|Path, required)
      • 文件夹路径
    • kind: (str|DatasetFolderSourceType, required)
      • 数据类型设置,目前支持 image, videoaudio 三种类型。
      • 会根据设置的 kind 值,在 folder 中递归寻找对应类型的文件。其他类型文件会被忽略掉。
    • name: (str|Resource, optional)
      • 数据集名称。
      • 若不指定,则使用目录名称作为数据集名称。
    • auto_label: (bool, optional)
      • 是否根据父目录的名字自动对每条记录打标签。
      • 默认为 True
    • alignment_size: (int|str, optional)
      • 数据集 blob 文件中数据对齐的尺寸。
      • 默认值为 128,即128个字节对齐。
    • volume_size: (int|str, optional)
      • 数据集 blob 文件的最大尺寸,超过后会产生一个新的 blob 文件。
      • 默认值为 64MB,即每个blob文件64MB大小。
    • mode: (str|DatasetChangeMode, optional)
      • 数据集更新的模式,包括 patch 模式和 overwrite 模式。
      • 默认为 patch 模式。
    • tags: (List[str], optional)
      • 用户自定义的数据集标签。

    使用示例 ${folder-example}

    • 函数调用示例

      from starwhale import Dataset

      # create a my-image-dataset dataset from /path/to/image folder.
      ds = Dataset.from_folder(
      folder="/path/to/image",
      kind="image",
      name="my-image-dataset"
      )
    • caption 示例

      folder/dog/1.png
      folder/dog/1.txt

      1.txt 中的内容,会填充到 1.png 所在行中 caption 字段中。

    • metadata.csvmetadata.jsonl 示例

      metadata.csv 内容:

      file_name, caption
      1.png, dog
      2.png, cat

      metadata.jsonl 内容:

      {"file_name": "1.png", "caption": "dog"}
      {"file_name": "2.png", "caption": "cat"}
    • 自动 label 示例

      folder/dog/1.png
      folder/cat/2.png
      folder/dog/3.png
      folder/cat/4.png

      生成的数据集中包含四条数据,分为 dogcat 两类。

    __iter__

    __iter__ 是一个 method 方法,能否对数据集进行迭代。

    from starwhale import dataset

    ds = dataset("mnist")

    for item in ds:
    print(item.index)
    print(item.features.label) # label 和 img 是 mnist数据集中的数据列
    print(item.features.img)

    batch_iter

    batch_iter 是一个 method 方法,能否批量的进行数据集迭代。

    def batch_iter(
    self, batch_size: int = 1, drop_not_full: bool = False
    ) -> t.Iterator[t.List[DataRow]]:

    参数

    • batch_size: (int, optional)
      • batch的大小,默认值为1。
    • drop_not_full: (bool, optional)
      • 最后一组batch数据数量小于 batch_size 时,该组数据是否会被抛弃掉。
      • 默认是不抛弃。

    使用示例

    from starwhale import dataset

    ds = dataset("mnist")
    for batch_rows in ds.batch_iter(batch_size=2):
    assert len(batch_rows) == 2
    print(batch_rows[0].features)

    __getitem__

    __getitem__ 是一个 method 方法,能提供数据集中某些行数据的获取,操作方式类似 Python 的 dict 和 list 类型。

    from starwhale import dataset

    ds = dataset("mock-int-index")

    # if the index type is string
    ds["str_key"] # get the DataRow by the "str_key" string key
    ds["start":"end"] # get a slice of the dataset by the range ("start", "end")

    ds = dataset("mock-str-index")
    # if the index type is int
    ds[1] # get the DataRow by the 1 int key
    ds[1:10:2] # get a slice of the dataset by the range (1, 10), step is 2

    __setitem__

    __setitem__ 是一个 method 方法,能提供数据集中行数据的更新,操作方式类似 Python 的 dict 类型。__setitem__ 支持多线程并行插入数据。

    def __setitem__(
    self, key: t.Union[str, int], value: t.Union[DataRow, t.Tuple, t.Dict]
    ) -> None:

    参数

    • key: (int|str, required)
      • key 即为数据集中每行的 index,类型为 int 或 str,一个数据集中只接受一种类型。
    • value: (DataRow|tuple|dict, required)
      • value 即为数据集中每行的 features,一般建议用 Python 的 dict 类型。

    使用示例

    • 插入数据

    test 数据中插入两条数据,index分别为 testtest2

    from starwhale import dataset

    with dataset("test") as ds:
    ds["test"] = {"txt": "abc", "int": 1}
    ds["test2"] = {"txt": "bcd", "int": 2}
    ds.commit()
    • 并行插入数据
    from starwhale import dataset, Binary
    from concurrent.futures import as_completed, ThreadPoolExecutor

    ds = dataset("test")

    def _do_append(_start: int) -> None:
    for i in range(_start, 100):
    ds.append((i, {"data": Binary(), "label": i}))

    pool = ThreadPoolExecutor(max_workers=10)
    tasks = [pool.submit(_do_append, i * 10) for i in range(0, 9)]

    ds.commit()
    ds.close()

    __delitem__

    __delitem__ 是一个 method 方法,用来删除数据集中的某些行数据。

    def __delitem__(self, key: _ItemType) -> None:
    from starwhale import dataset

    ds = dataset("existed-ds")
    del ds[6:9]
    del ds[0]
    ds.commit()
    ds.close()

    append

    append 是一个 method 方法,用来向数据集中添加数据,类似 Python list 的 append 函数。

    • 添加 features dict,每行数据自动 index 为 int 类型,从0开始自增。

      from starwhale import dataset, Image

      with dataset("new-ds") as ds:
      for i in range(0, 100):
      ds.append({"label": i, "image": Image(f"folder/{i}.png")})
      ds.commit()
    • 添加 index + features dict,数据集中每行数据的 index 不会被自动处理。

      from dataset import dataset, Image

      with dataset("new-ds") as ds:
      for i in range(0, 100):
      ds.append((f"index-{i}", {"label": i, "image": Image(f"folder/{i}.png")}))

      ds.commit()

    extend

    extend 是一个 method 方法,用来向数据集中批量添加数据,类似 Python list 的 extend 函数。

    from starwhale import dataset, Text

    ds = dataset("new-ds")
    ds.extend([
    (f"label-{i}", {"text": Text(), "label": i}) for i in range(0, 10)
    ])
    ds.commit()
    ds.close()

    commit

    commit 是一个 method 方法,调用 commit 时会将当前缓存中数据 flush 到存储中,并产生一个数据集版本,后续可以用这个版本信息加载相应的数据集内容。

    对于一个数据集,如果添加一些数据后,并没有调用 commit 方法,而是直接调用 close 或退出进程,那么这些数据依旧会写入到数据集中,只是没有一个生成一个新的版本。

    @_check_readonly
    def commit(
    self,
    tags: t.Optional[t.List[str]] = None,
    message: str = "",
    force_add_tags: bool = False,
    ignore_add_tags_errors: bool = False,
    ) -> str:

    参数

    • tags: (List(str), optional)
      • 指定 tags,可以指定多个tag。
    • message: (str, optional)
      • 提交信息,默认为空。
    • force_add_tags: (bool, optional)
      • 当给改版本添加标签时,对于 server/cloud 实例,若标签已经被应用到其他数据集版本时,可以使用 force_add_tags=True 参数强制将标签添加到此版本上,否则会抛出异常。
      • 默认为 False
    • ignore_add_tags_errors: (bool, optional)
      • 忽略添加标签是否抛出的异常。
      • 默认为 False

    使用示例

    from starwhale import dataset
    with dataset("mnist") as ds:
    ds.append({"label": 1})
    ds.commit(message="init commit")

    readonly

    readonly 是一个 property 属性,表示数据集是否只读,返回值为 bool 类型。

    from starwhale import dataset
    ds = dataset("mnist", readonly=True)
    assert ds.readonly

    loading_version

    loading_version 是一个 property 属性,字符串类型。

    • 当加载一个已经存在的数据集时,返回的是数据集加载的对应版本。
    • 对加载一个不存在的数据集时,返回的是 pending_commit_version

    pending_commit_version

    pending_commit_version 是一个 property 属性,字符串类型,表示待提交的版本信息。当调用 commit 方法后,pending_commit_version 会变成 committed_version

    committed_version

    committed_version 是一个 property 属性,字符串类型,表示已经调用 commit 方法后生成的版本信息。当没有调用 commit 方法时,访问该属性时程序会抛出异常。

    remove

    remove 是一个 method 方法,等价于 swcli dataset remove 命令,能够删除数据集。

    def remove(self, force: bool = False) -> None:

    recover

    recover 是一个 method 方法,等价于 swcli dataset recover 命令,能够对软删除且未GC的数据集进行恢复。

    def recover(self, force: bool = False) -> None:

    summary

    summary 是一个 method 方法,等价于 swcli dataset summary 命令,返回数据集摘要信息。

    def summary(self) -> t.Optional[DatasetSummary]:

    history

    history 是一个 method 方法,等价于 swcli dataset history 命令,返回数据集的历史记录。

    def history(self) -> t.List[t.Dict]:

    flush

    flush 是一个 method 方法,能够将内存中暂存的数据刷到持久化存储中。commitclose 方法会自动调用 flush

    close

    close 是一个 method 方法,关闭已经打开的数据集相关链接。Dataset 也实现了 contextmanager,使用 with 语法后可以自动关闭数据集,不需要主动调用 close 方法。

    from starwhale import dataset

    ds = dataset("mnist")
    ds.close()

    with dataset("mnist") as ds:
    print(ds[0])

    head 是一个 method 方法,能够显示数据集前n行数据,等价于 swcli dataset head 命令。

    def head(self, n: int = 5, skip_fetch_data: bool = False) -> t.List[DataRow]:

    fetch_one

    fetch_one 是一个 method 方法,获得数据集的第一条记录,相当于 head(n=1)[0]

    list

    list 是一个 classmethod 方法,能够列出项目 URI 下的 Starwhale 数据集,等价于 swcli dataset list 命令。

    @classmethod
    def list(
    cls,
    project_uri: t.Union[str, Project] = "",
    fullname: bool = False,
    show_removed: bool = False,
    page_index: int = DEFAULT_PAGE_IDX,
    page_size: int = DEFAULT_PAGE_SIZE,
    ) -> Tuple[DatasetListType, Dict[str, Any]]:

    copy

    copy 是一个 method 方法,能够复制数据到其他实例上,等价于 swcli dataset copy 命令。

    def copy(
    self,
    dest_uri: str,
    dest_local_project_uri: str = "",
    force: bool = False,
    mode: str = DatasetChangeMode.PATCH.value,
    ignore_tags: t.List[str] | None = None,
    ) -> None:

    参数

    • dest_uri: (str, required)
      • Dataset URI
    • dest_local_project_uri: (str, optional)
      • 从远端复制到本地 Standalone 实例时,可以指定对应的项目 URI。
    • force: (bool, optional)
      • 当目标实例上已经有相同版本的数据集时,是否强制覆盖。
      • 默认不覆盖。
      • 当复制标签到远端 Server/Cloud 实例时,若标签已经被其他版本使用,使用 force=True 参数可以强制变更标签到本版本上。
    • mode: (str, optional)
      • 数据集复制模式,分为 patch 模式 和 overwrite 模式,默认为 patch
      • patch: 使用补丁方式更新数据集,只更新计划变更的行和列,在新生成的版本中仍能读取到未受影响的行和列。
      • overwrite: 使用覆盖方式更新数据集,会将原来的所有行都删除,然后再进行更新,在新生成的版本中读取不到老数据。但请放心,删除的数据依旧可以通过旧版本进行访问。
    • ignore_tags (List[str], optional)
      • 复制数据集时,可以忽略的自定义标签。
      • 默认会复制所有用户自定义标签到其他实例中。
      • 复制标签会忽略 latest^v\d+$ 内建标签。

    使用示例

    from starwhale import dataset
    ds = dataset("mnist")
    ds.copy("cloud://remote-instance/project/starwhale")

    to_pytorch

    to_pytorch 是一个 method 方法,能够将 Starwhale 数据集转化为 Pytorch 的 torch.utils.data.Dataset 类型,可以进一步传给 torch.utils.data.DataLoader 进行使用。

    需要注意的是,to_pytorch 函数返回的是 Pytorch 的 IterableDataset

    def to_pytorch(
    self,
    transform: t.Optional[t.Callable] = None,
    drop_index: bool = True,
    skip_default_transform: bool = False,
    ) -> torch.utils.data.Dataset:

    参数

    • transform: (callable, optional)
      • 支持用户自定义变换函数,能够按需转化数据类型。
    • drop_index: (bool, optional)
      • 是否抛弃掉 index 字段。
    • skip_default_transform: (bool, optional)
      • 如果没有设置 transform, 默认状态下会使用 Starwhale 内建的 transform 函数,对数据进行转化,可以通过 skip_default_transform 参数禁用内建数据转化。

    使用示例

    import torch.utils.data as tdata
    from starwhale import dataset

    ds = dataset("mnist")

    torch_ds = ds.to_pytorch()
    torch_loader = tdata.DataLoader(torch_ds, batch_size=2)
    import torch.utils.data as tdata
    from starwhale import dataset

    with dataset("mnist") as ds:
    for i in range(0, 10):
    ds.append({"txt": Text(f"data-{i}"), "label": i})

    ds.commit()

    def _custom_transform(data: t.Any) -> t.Any:
    data = data.copy()
    txt = data["txt"].to_str()
    data["txt"] = f"custom-{txt}"
    return data

    torch_loader = tdata.DataLoader(
    dataset(ds.uri).to_pytorch(transform=_custom_transform), batch_size=1
    )
    item = next(iter(torch_loader))
    assert isinstance(item["label"], torch.Tensor)
    assert item["txt"][0] in ("custom-data-0", "custom-data-1")

    to_tensorflow

    to_tensorflow 是一个 method 方法,能够将 Starwhale 数据集转化为 Tensorflow 的 tensorflow.data.Dataset 类型。

    def to_tensorflow(self, drop_index: bool = True) -> tensorflow.data.Dataset:

    参数

    • drop_index: (bool, optional)
      • 是否抛弃掉 index 字段。

    使用示例

    from starwhale import dataset
    import tensorflow as tf

    ds = dataset("mnist")
    tf_ds = ds.to_tensorflow(drop_index=True)
    assert isinstance(tf_ds, tf.data.Dataset)

    with_builder_blob_config

    with_builder_blob_config 是一个 method 方法,用来设置 Starwhale 数据集中 blob 的相关属性信息。需要在变更数据之前调用。

    def with_builder_blob_config(
    self,
    volume_size: int | str | None = D_FILE_VOLUME_SIZE,
    alignment_size: int | str | None = D_ALIGNMENT_SIZE,
    ) -> Dataset:

    参数

    • volume_size: (int|str, optional)
      • 单个数据集 blob 文件的大小。
      • 默认值为 64MB。
      • 当类型为 int 时,单位为 Bytes。
      • 当类型为 str 是,格式类似 1GB, 64MB
    • alignment_size: (int|str, optional)
      • 数据集 blob 文件中数据对齐的大小
      • 默认值为 128个字节。
      • volume_size 一样的类型解析。

    使用示例

    from starwhale import dataset, Binary

    ds = dataset("mnist").with_builder_blob_config(volume_size="32M", alignment_size=128)
    ds.append({"data": Binary(b"123")})
    ds.commit()
    ds.close()

    with_loader_config

    with_loader_config 是一个 method 方法,用来设置 Starwhale 数据集 loader 的过程参数。

    def with_loader_config(
    self,
    num_workers: t.Optional[int] = None,
    cache_size: t.Optional[int] = None,
    field_transformer: t.Optional[t.Dict] = None,
    ) -> Dataset:

    参数

    • num_workers: (int, optional)
      • 加载数据集的 worker 数目,默认为2。
    • cache_size: (int, optional)
      • 预加载的数据的数量,默认为20条。
    • field_transformer: (dict, optional)
      • features 字段名称的变换。

    使用示例

    from starwhale import Dataset, dataset
    Dataset.from_json(
    "translation",
    '[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]'
    )
    myds = dataset("translation").with_loader_config(field_transformer={"en": "en-us"})
    assert myds[0].features["en-us"] == myds[0].features["en"]
    from starwhale import Dataset, dataset
    Dataset.from_json(
    "translation2",
    '[{"content":{"child_content":[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]}}]'
    )
    myds = dataset("translation2").with_loader_config(field_transformer={"content.child_content[0].en": "en-us"})
    assert myds[0].features["en-us"] == myds[0].features["content"]["child_content"][0]["en"]
    - + \ No newline at end of file diff --git a/zh/0.6.0/reference/sdk/evaluation/index.html b/zh/0.6.0/reference/sdk/evaluation/index.html index dad3543cb..1844537f6 100644 --- a/zh/0.6.0/reference/sdk/evaluation/index.html +++ b/zh/0.6.0/reference/sdk/evaluation/index.html @@ -10,7 +10,7 @@ - + @@ -18,7 +18,7 @@
    版本:0.6.0

    Starwhale 模型评测 SDK

    @evaluation.predict

    @evaluation.predict 是一个修饰器,定义模型评测中的推理过程,类似 MapReduce 中的 map 阶段,能够完成如下核心功能:

    • 在 Server 实例上,申请运行所需要的资源。
    • 自动读取本地或远端的数据集,将数据集中的数据以单条或批量的方式,传递给 evaluation.predict 修饰的函数。
    • 通过多副本的设置,实现分布式数据集消费的功能,能以水平扩展的方式缩短模型评测任务的用时。
    • 自动将函数返回值和数据集的输入 features 存储到 results 表中,方便Web UI展示和进一步的 evaluate 阶段使用。
    • 每单条或每批量组数据会调用一次被修饰的函数,完成推理过程。

    控制参数

    • resources: (dict, optional)
      • 定义 predict 每个任务在 Server 实例上运行时所需要的资源,包括 memorycpunvidia.com/gpu 三种类型。
        • memory: 单位为 Bytes,支持 int 和 float 类型。
          • 支持以字典的方式设置 requestlimit,比如 resources={"memory": {"request": 100 * 1024, "limit": 200: 1024}}
          • 若仅设置单个数字,则 SDK 会自动将 requestlimit 设置为相同的数值,比如 resources={"memory": 100 * 1024} 等价于 resources={"memory": {"request": 100 * 1024, "limit": 100 * 1024}}
        • cpu: 单位为 CPU 核心数,支持 int 和 float 类型。
          • 支持以字典的方式设置 requestlimit,比如 resources={"cpu": {"request": 1, "limit": 2}}
          • 若仅设置单个数字,则 SDK 会自动将 requestlimit 设置为相同的数值,比如 resources={"cpu": 1.5} 等价于 resources={"cpu": {"request": 1.5, "limit": 1.5}}
        • nvidia.com/gpu: 单位为 GPU显卡数,支持 int 类型。
          • nvidia.com/gpu 不支持设置 requestlimit,仅支持单个数字。
      • 需要注意: resource 参数目前仅在 Server 实例中生效。Cloud 实例,通过在提交评测任务时,选择对应的资源池达到相同的作用。Standalone 实例完全不支持该特性。
    • replicas: (int, optional)
      • predict 运行的副本数。
      • predict 相当于定义了一个 Step, 在该 Step 中有若干等价的 Task,每个 Task 在 Cloud/Server 实例上运行实体是 Pod,在 Standalone 实例上运行实体是 Thread。
      • 当指定多个副本时,这些副本是等价的,它们会共同消费选定的数据集,实现分布式数据集消费的目的,可以理解为某个数据集中的某行数据,只会被一个 predict 副本读取。
      • 默认值为1。
    • batch_size: (int, optional)
      • 批量将数据集中的数据传递进函数中。
      • 默认值为1。
    • fail_on_error: (bool, optional)
      • 当被修饰的函数抛出异常时,是否中断所有模型评测。如果预期某些“异常”数据会导致评测失败,但不想中断整体评测,可以设置 fail_on_error=False
      • 默认为 True
    • auto_log: (bool, optional)
      • 是否自动记录函数返回值和数据集输入 features 到 results 表中。
      • 默认为 True
    • log_mode: (str, optional)
      • auto_log=True 时,可以通过设置 log_mode 参数,定义以 plainpickle 方式记录函数返回值。
      • 默认为 pickle 方式。
    • log_dataset_features: (List[str], optional)
      • auto_log=True 时,可以通过该参数,选择性的记录数据集中的某些 features 。
      • 默认会记录所有的 features 。
    • needs: (List[Callable], optional)
      • 定义该任务运行的前置条件,可以用 needs 语法实现 DAG。
      • needs 接受被 @evaluation.predict, @evaluation.evaluate@handler 修饰的函数。
      • 默认为空,不依赖任何其他任务。

    传入参数

    被修饰的函数,需要定义一些输入参数,用来接受数据集内容等,包含如下模式:

    • 单个 data 参数:

      • data 为 一个类 dict 类型,能够读取到数据集的 features 内容。
      • batch_size=1 或不设置 batch_size 时,可以通过 data['label']data.label 方式读取 label feature。
      • 当设置 batch_size > 1 时,data 为一个 list。
      from starwhale import evaluation

      @evaluation.predict
      def predict(data):
      print(data['label'])
      print(data.label)
    • data + external 参数方式:

      • data 为数据集的features。
      • external 为一个 dict 类型,包含 index, index_with_dataset, dataset_info, contextdataset_uri 这些内建属性,可以用来做更细粒度的处理。
        • index: 数据集对应行的 index 信息。
        • index_with_dataset: 适用于多个数据集输入的时候做 index 区分。
        • dataset_info: starwhale.core.dataset.tabular.TabularDatasetInfo 对象。
        • context: starwhale.Context 对象。
        • dataset_uri: starwhale.nase.uri.resource.Resource 对象。
      from starwhale import evaluation

      @evaluation.predict
      def predict(data, external):
      print(data['label'])
      print(data.label)
      print(external["context"])
      print(external["dataset_uri"])
    • data + **kw 方式:

      • data 为数据集的features。
      • kw 可以读取到 external
      from starwhale import evaluation

      @evaluation.predict
      def predict(data, **kw):
      print(kw["external"]["context"])
      print(kw["external"]["dataset_uri"])
    • *args + **kwargs 方式:

      • args的第一个元素为 data
      from starwhale import evaluation

      @evaluation.predict
      def predict(*args, **kw):
      print(args[0].label)
      print(args[0]["label"])
      print(kw["external"]["context"])
    • **kwargs 方式:

      from starwhale import evaluation

      @evaluation.predict
      def predict(**kw):
      print(kw["data"].label)
      print(kw["data"]["label"])
      print(kw["external"]["context"])
    • *args 方式:

      • 此方式无法读取到 external 信息。
      from starwhale import evaluation

      @evaluation.predict
      def predict(*args):
      print(args[0].label)
      print(args[0]["label"])

    使用示例

    from starwhale import evaluation

    @evaluation.predict
    def predict_image(data):
    ...

    @evaluation.predict(
    dataset="mnist/version/latest",
    batch_size=32,
    replicas=4,
    needs=[predict_image],
    )
    def predict_batch_images(batch_data)
    ...

    @evaluation.predict(
    resources={"nvidia.com/gpu": 1,
    "cpu": {"request": 1, "limit": 2},
    "memory": 200 * 1024}, # 200MB
    log_mode="plain",
    )
    def predict_with_resources(data):
    ...

    @evaluation.predict(
    replicas=1,
    log_mode="plain",
    log_dataset_features=["txt", "img", "label"],
    )
    def predict_with_selected_features(data):
    ...

    @evaluation.evaluate

    @evaluation.evalute 是一个修饰器,定义模型评测中的评测过程,类似 MapReduce 中的 reduce 阶段,能够完成如下核心功能:

    • 在 Server 实例上,申请运行所需要的资源。
    • 自动读取 predict 阶段记录到 results 表的数据,并以迭代器的方式传入函数中。
    • evaluate 阶段只会运行一个副本,无法像 predict 阶段一样定义 replicas 参数。

    参数

    • resources: (dict, optional)
      • @evaluation.predict 中的 resources 参数定义保持一致。
    • needs: (List[Callable], optional)
      • @evaluation.predict 中的 needs 参数定义保持一致。
      • 绝大多数场景中,会依赖一个 @evaluation.predict 修饰的函数。
    • use_predict_auto_log: (bool, optional)
      • 默认为 True,传入一个能够能够遍历 predict 结果的迭代器到函数中。

    输入参数

    • use_predict_auto_log=True(默认)时,传入一个能够能够遍历 predict 结果的迭代器到函数中。
      • 迭代出来的对象为一个字典,包含 outputinput 两个key。
        • outputpredict 阶段函数返回的元素。
        • input 为推理时对应使用的数据集的 features ,为一个字典类型。
    • use_predict_auto_log=False 时,不传入任何参数到函数中。

    使用示例

    from starwhale import evaluation

    @evaluation.evaluate(needs=[predict_image])
    def evaluate_results(predict_result_iter):
    ...

    @evaluation.evaluate(
    use_predict_auto_log=False,
    needs=[predict_image],
    )
    def evaluate_results():
    ...

    class Evaluation

    starwhale.Evaluation 实现 Starwhale Model Evaluation 的抽象,能对Standalone/Server/Cloud实例上的Model Evaluation进行log和scan等操作,用来记录和检索指标。

    __init__

    __init__ 函数用来初始化一个 Evaluation 对象。

    class Evaluation
    def __init__(self, id: str, project: Project | str) -> None:

    参数

    • id: (str, required)
      • Evaluation 的 UUID,此ID由 Starwhale 系统自动生成。
    • project: (Project|str, required)
      • Project 对象或 Project URI 字符串。

    使用示例

    from starwhale import Evaluation

    standalone_e = Evaluation("fcd1206bf1694fce8053724861c7874c", project="self")
    server_e = Evaluation("fcd1206bf1694fce8053724861c7874c", project="cloud://server/project/starwhale:starwhale")
    cloud_e = Evaluation("2ddab20df9e9430dbd73853d773a9ff6", project="https://cloud.starwhale.cn/project/starwhale:llm-leaderboard")

    from_context

    from_context 是一个 classmethod 方法,获得当前 Context 下的 Evaluation 对象。from_context 在任务运行环境下才能生效,非任务运行环境调用该方法,会抛出 RuntimeError 异常,提示 Starwhale Context 没有被合理设置。

    @classmethod
    def from_context(cls) -> Evaluation:

    使用示例

    from starwhale import Evaluation

    with Evaluation.from_context() as e:
    e.log("label/1", 1, {"loss": 0.99, "accuracy": 0.98})

    log

    log 是一个 method 方法,记录某些评测指标到特定表中,之后可以通过 Server/Cloud 实例的 Web 页面或 scan 方法中查看相关的表。

    def log(
    self, category: str, id: t.Union[str, int], metrics: t.Dict[str, t.Any]
    ) -> None:

    参数

    • category: (str, required)
      • 记录的类别,该值会被作为 Starwhale Datastore 的表名的后缀。
      • 一个 category 会对应一张 Starwhale Datastore 的表,这些表会以评测任务ID作为隔离区分,相互不影响。
    • id: (str|int, required)
      • 记录的ID,表内唯一。
      • 同一张表,只能采用 str 或 int 的一种类型作为 ID 类型。
    • metrics: (dict, required)
      • 字典类型,key-value 方式记录指标。
      • key 为 str 类型。
      • value 既支持 int, float, str, bytes, bool 等常量类型,也支持 tuple, list, dict 等复合类型。同时也支持Artifacts类型 Starwhale.Image, Starwhale.Video, Starwhale.Audio, Starwhale.Text, Starwhale.Binary 的记录。
      • 当 value 中包含 dict 类型时,Starwhale SDK会自动展平字典,便于更好的进行可视化展示和指标对比。
        • 比如 metrics 为 {"test": {"loss": 0.99, "prob": [0.98,0.99]}, "image": [Image, Image]} , 存入后会变成 {"test/loss": 0.99, "test/prob": [0.98, 0.99], "image/0": Image, "image/1": Image} 结构。

    使用示例

    from starwhale import Evaluation

    evaluation_store = Evaluation.from_context()

    evaluation_store.log("label/1", 1, {"loss": 0.99, "accuracy": 0.98})
    evaluation_store.log("ppl", "1", {"a": "test", "b": 1})

    scan

    scan 是一个 method 方法,返回一个迭代器,用来读取某些模型评测表中的数据。

    def scan(
    self,
    category: str,
    start: t.Any = None,
    end: t.Any = None,
    keep_none: bool = False,
    end_inclusive: bool = False,
    ) -> t.Iterator:

    参数

    • category: (str, required)
      • log 函数中的 category 参数含义一致。
    • start: (Any, optional)
      • 起始 Key,若不指定,则从表的第一条数据开始。
    • end: (Any, optional)
      • 结束 Key,若不指定,则一直遍历到表的结尾。
    • keep_none: (bool, optional)
      • 若某列的值为 None,是否返回该列,默认不返回。
    • end_inclusive: (bool, optional)
      • 是否包含 end 对应的行,默认不包含。

    使用示例

    from starwhale import Evaluation

    evaluation_store = Evaluation(id="2ddab20df9e9430dbd73853d773a9ff6", project="https://cloud.starwhale.cn/projects/349")
    results = [data for data in evaluation_store.scan("label/0")]

    flush

    flush 是一个 method 方法,能够将 log 方法记录的指标立即刷新到 datastore 和 oss 存储中。若不调用 flush 方法,Evaluation 最后关闭的时候,也会自动刷新数据到存储中。

    def flush(self, category: str, artifacts_flush: bool = True) -> None

    参数

    • category: (str, required)
      • log 函数中的 category 参数含义一致。
    • artifacts_flush: (bool, optional)
      • 是否转储制品数据到blob文件,并上传到相关存储中。默认为 True

    log_result

    log_result 是一个 method 方法,记录评测指标到 results 表中,等价于 log 方法指定 category 参数为 resultsresults 表一般用来存储推理结果,@starwhale.predict 默认情况下会将修饰函数的返回值存储在 results 表中,也可以用 log_results 手动存储。

    def log_result(self, id: t.Union[str, int], metrics: t.Dict[str, t.Any]) -> None:

    参数

    • id: (str|int, required)
      • 记录的ID,results 表内唯一。
      • 同一张表,只能采用 str 或 int 的一种类型作为 ID 类型。
    • metrics: (dict, required)
      • log 函数中 metrics 参数定义一致。

    使用示例

    from starwhale import Evaluation

    evaluation_store = Evaluation(id="2ddab20df9e9430dbd73853d773a9ff6", project="self")
    evaluation_store.log_result(1, {"loss": 0.99, "accuracy": 0.98})
    evaluation_store.log_result(2, {"loss": 0.98, "accuracy": 0.99})

    scan_results

    scan_results 是一个 method 方法,返回一个迭代器,用来读取 results 表中的数据。

    def scan_results(
    self,
    start: t.Any = None,
    end: t.Any = None,
    keep_none: bool = False,
    end_inclusive: bool = False,
    ) -> t.Iterator:

    参数

    • start: (Any, optional)
      • 起始 Key,若不指定,则从表的第一条数据开始。
      • scan 函数中 start 参数定义一致。
    • end: (Any, optional)
      • 结束 Key,若不指定,则一直遍历到表的结尾。
      • scan 函数中 end 参数定义一致。
    • keep_none: (bool, optional)
      • 若某列的值为 None,是否返回该列,默认不返回。
      • scan 函数中 keep_none 参数定义一致。
    • end_inclusive: (bool, optional)
      • 是否包含 end 对应的行,默认不包含。
      • scan 函数中 end_inclusive 参数定义一致。

    使用示例

    from starwhale import Evaluation

    evaluation_store = Evaluation(id="2ddab20df9e9430dbd73853d773a9ff6", project="self")

    evaluation_store.log_result(1, {"loss": 0.99, "accuracy": 0.98})
    evaluation_store.log_result(2, {"loss": 0.98, "accuracy": 0.99})
    results = [data for data in evaluation_store.scan_results()]

    flush_results

    flush_results 是一个 method 方法,能够将 log_results 方法记录的指标立即刷新到 datastore 和 oss 存储中。若不调用 flush_results 方法,Evaluation 最后关闭的时候,也会自动刷新数据到存储中。

    def flush_results(self, artifacts_flush: bool = True) -> None:

    参数

    • artifacts_flush: (bool, optional)
      • 是否转储制品数据到blob文件,并上传到相关存储中。默认为 True
      • flush 方法中 artifacts_flush 参数定义一致。

    log_summary

    log_summary 是一个 method 方法,记录某些指标到 summary 表中,Server/Cloud 实例评测页面显示的就是 summary 表的数据。 每次调用,Starwhale 都会自动以此次评测的唯一ID作为表的行ID进行更新,可以再一次评测过程中多次调用该函数,用来更新不同的列。

    每个项目中有一张 summary 表,所有该项目下的评测任务都会将 summary 信息写入该表中,便于进行不同模型评测的结果对比。

    def log_summary(self, *args: t.Any, **kw: t.Any) -> None:

    log 函数一致,也会对字典类型自动展平。

    使用示例

    from starwhale import Evaluation

    evaluation_store = Evaluation(id="2ddab20df9e9430dbd73853d773a9ff6", project="https://cloud.starwhale.cn/projects/349")

    evaluation_store.log_summary(loss=0.99)
    evaluation_store.log_summary(loss=0.99, accuracy=0.99)
    evaluation_store.log_summary({"loss": 0.99, "accuracy": 0.99})

    get_summary

    get_summary 是一个 method 方法,用来返回 log_summary 记录的信息。

    def get_summary(self) -> t.Dict:

    flush_summary

    flush_summary 是一个 method 方法,能够将 log_summary 方法记录的指标立即刷新到 datastore 和 oss 存储中。若不调用 flush_results 方法,Evaluation 最后关闭的时候,也会自动刷新数据到存储中。

    def flush_summary(self, artifacts_flush: bool = True) -> None:

    参数

    • artifacts_flush: (bool, optional)
      • 是否转储制品数据到blob文件,并上传到相关存储中。默认为 True
      • flush 方法中 artifacts_flush 参数定义一致。

    flush_all

    flush_all 是一个 method 方法,能够将 log, log_results, log_summary 方法记录的指标立即刷新到 datastore 和 oss 存储中。若不调用 flush_all 方法,Evaluation 最后关闭的时候,也会自动刷新数据到存储中。

    def flush_all(self, artifacts_flush: bool = True) -> None:

    参数

    • artifacts_flush: (bool, optional)
      • 是否转储制品数据到blob文件,并上传到相关存储中。默认为 True
      • flush 方法中 artifacts_flush 参数定义一致。

    get_tables

    get_tables 是一个 method 方法,返回模型评测中产生的所有表的名称,需要注意的是,该函数并不返回 summary 表名称。

    def get_tables(self) -> t.List[str]:

    close

    close 是一个 method 方法,用来关闭 Evaluation 对象。close 调用时会将,会自动刷新数据到存储中。同时 Evaluation 也实现了 __enter____exit__ 方法,可以用 with 语法简化 close 的手工调用。

    def close(self) -> None:

    使用示例

    from starwhale import Evaluation

    evaluation_store = Evaluation(id="2ddab20df9e9430dbd73853d773a9ff6", project="https://cloud.starwhale.cn/projects/349")
    evaluation_store.log_summary(loss=0.99)
    evaluation_store.close()

    # auto close when the with-context exits.
    with Evaluation.from_context() as e:
    e.log_summary(loss=0.99)

    @handler

    @handler 是一个修饰器,具备如下功能:

    • 在 Server 实例上,申请运行所需要的资源。
    • 可以控制副本数。
    • 多个 Handlers 可以通过依赖关系,生成DAG,便于控制执行流程。
    • 可以对外暴露端口,以类似 Web Handler 方式运行。

    @fine_tune, @evaluation.predict@evaluation.evalute 可以认为是 @handler 在某些特定领域的应用,@handler 是这些修饰器的底层实现。@handler 更为基础和灵活。

    @classmethod
    def handler(
    cls,
    resources: t.Optional[t.Dict[str, t.Any]] = None,
    replicas: int = 1,
    needs: t.Optional[t.List[t.Callable]] = None,
    name: str = "",
    expose: int = 0,
    require_dataset: bool = False,
    ) -> t.Callable:

    参数

    • resources: (dict, optional)
      • @evaluation.predict 中的 resources 参数定义保持一致。
    • needs: (List[Callable], optional)
      • @evaluation.predict 中的 needs 参数定义保持一致。
    • replicas: (int, optional)
      • @evaluation.predict 中的 replicas 参数定义保持一致。
    • name: (str, optional)
      • 显示 handler 时候用的名字。
      • 若不指定,则用修饰函数的名字。
    • expose: (int, optional)
      • 对外暴露的端口,当运行一个 Web Handler的时候,需要声明暴露的端口。
      • 默认为0,表示不暴露任何端口。
      • 目前只能暴露一个端口。
    • require_dataset: (bool, optional)
      • 定义此 Handler 运行时,是否需要数据集。
      • 如果 required_dataset=True,在 Server/Cloud 实例的 Web 界面创建评测任务的时候,需要让用户强制输入数据集;如果 required_dataset=False,则 Web 界面中不需要用户指定数据集。
      • 默认为 False

    使用示例

    from starwhale import handler
    import gradio

    @handler(resources={"cpu": 1, "nvidia.com/gpu": 1}, replicas=3)
    def my_handler():
    ...

    @handler(needs=[my_handler])
    def my_another_handler():
    ...

    @handler(expose=7860)
    def chatbot():
    with gradio.Blocks() as server:
    ...
    server.launch(server_name="0.0.0.0", server_port=7860)

    @fine_tune

    fine_tune 是一个修饰器,定义模型训练的微调(fine-tune)过程。

    一些限制和使用建议:

    • fine_tune 只有一个副本。
    • fine_tune 需要有数据集输入。
    • 一般在 fine_tune 开始时,通过 Context.get_runtime_context() 获取数据集。
    • 一般在 fine_tune 结束是,通过 starwhale.model.build 生成微调后的Starwhale 模型包,该模型包会被自动复制到评测对应的项目中。

    参数

    • resources: (dict, optional)
      • @evaluation.predict 中的 resources 参数定义保持一致。
    • needs: (List[Callable], optional)
      • @evaluation.predict 中的 needs 参数定义保持一致。

    使用示例

    from starwhale import model as starwhale_model
    from starwhale import fine_tune, Context

    @fine_tune(resources={"nvidia.com/gpu": 1})
    def llama_fine_tuning():
    ctx = Context.get_runtime_context()

    if len(ctx.dataset_uris) == 2:
    # TODO: use more graceful way to get train and eval dataset
    train_dataset = dataset(ctx.dataset_uris[0], readonly=True, create="forbid")
    eval_dataset = dataset(ctx.dataset_uris[1], readonly=True, create="forbid")
    elif len(ctx.dataset_uris) == 1:
    train_dataset = dataset(ctx.dataset_uris[0], readonly=True, create="forbid")
    eval_dataset = None
    else:
    raise ValueError("Only support 1 or 2 datasets(train and eval dataset) for now")

    #user training code
    train_llama(
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    )

    model_name = get_model_name()
    starwhale_model.build(name=f"llama-{model_name}-qlora-ft")

    @multi_classification

    @multi_classification 修饰器使用sklearn lib对多分类问题进行结果分析,输出confusion matrix, roc, auc等值,并且会写入到 starwhale DataStore 相关表中。 使用的时候需要对所修饰的函数返回值有一定要求,返回(label, result, probability_matrix)(label, result)

    def multi_classification(
    confusion_matrix_normalize: str = "all",
    show_hamming_loss: bool = True,
    show_cohen_kappa_score: bool = True,
    show_roc_auc: bool = True,
    all_labels: t.Optional[t.List[t.Any]] = None,
    ) -> t.Any:

    参数

    • confusion_matrix_normalize: (str, optional)
      • 接收三种参数:
        • true: rows
        • pred: columns
        • all: rows+columns
    • show_hamming_loss: (bool, optional)
      • 是否计算hamming loss。
      • 默认为 True
    • show_cohen_kappa_score: (bool, optional)
      • 是否计算 cohen kappa score。
      • 默认为 True
    • show_roc_auc: (bool, optional)
      • 是否计算roc/auc, 计算的时候,需要函数返回(label,result, probability_matrix) 三元组,否则只需返回(label, result) 两元组即可。
      • 默认为 True
    • all_labels: (List, optional)
      • 定义所有的Labels。

    使用示例


    @multi_classification(
    confusion_matrix_normalize="all",
    show_hamming_loss=True,
    show_cohen_kappa_score=True,
    show_roc_auc=True,
    all_labels=[i for i in range(0, 10)],
    )
    def evaluate(ppl_result) -> t.Tuple[t.List[int], t.List[int], t.List[t.List[float]]]:
    label, result, probability_matrix = [], [], []
    return label, result, probability_matrix

    @multi_classification(
    confusion_matrix_normalize="all",
    show_hamming_loss=True,
    show_cohen_kappa_score=True,
    show_roc_auc=False,
    all_labels=[i for i in range(0, 10)],
    )
    def evaluate(ppl_result) -> t.Tuple[t.List[int], t.List[int], t.List[t.List[float]]]:
    label, result = [], [], []
    return label, result

    PipelineHandler

    PipelineHandler 是一个类,提供默认的模型评测过程定义,需要用户实现 predictevaluate 函数。

    PipelineHandler 等价于 @evaluation.predict + @evaluation.evaluate,展示使用方式不一样,背后的模型评测过程一致。

    用户需要实现如下函数:

    • predict: 定义推理过程,等价于 @evaluation.predict 修饰的函数。
    • evaluate: 定义评测过程,等价于 @evaluation.evaluate 修饰的函数。
    from typing import Any, Iterator
    from abc import ABCMeta, abstractmethod

    class PipelineHandler(metaclass=ABCMeta):
    def __init__(
    self,
    predict_batch_size: int = 1,
    ignore_error: bool = False,
    predict_auto_log: bool = True,
    predict_log_mode: str = PredictLogMode.PICKLE.value,
    predict_log_dataset_features: t.Optional[t.List[str]] = None,
    **kwargs: t.Any,
    ) -> None:
    self.context = Context.get_runtime_context()
    ...

    def predict(self, data: Any, **kw: Any) -> Any:
    raise NotImplementedError

    def evaluate(self, ppl_result: Iterator) -> Any
    raise NotImplementedError

    参数

    • predict_batch_size: (int, optional)
      • 等价于 @evaluation.predict 中的 batch_size 参数。
      • 默认值为1。
    • ignore_error: (bool, optional)
      • 等价于 @evaluation.predict 中的 fail_on_error 参数。
      • 默认值为 False
    • predict_auto_log: (bool, optional)
      • 等价于 @evaluation.predict 中的 auto_log 参数。
      • 默认值为 True
    • predict_log_mode: (bool, optional)
      • 等价于 @evaluation.predict 中的 log_mode 参数。
      • 默认值为 pickle
    • predict_log_dataset_features: (bool, optional)
      • 等价于 @evaluation.predict 中的 log_dataset_features 参数。
      • 默认值为空,对记录所有 features。

    PipelineHandler.run 修饰符

    PipelineHandler.run 修饰符可以对 predictevaluate 方法进行资源描述,支持 replicasresources 的定义:

    • PipelineHandler.run 只能修饰继承自 PipelineHandler 子类中的 predictevaluate方法。
    • predict 方法可以设置 replicas 参数。evaluate 方法的 replicas 值永远为1。
    • resources 参数与 @evaluation.predict@evaluation.evaluate 中的 resources 参数定义和使用方法保持一致。
    • PipelineHandler.run 修饰器是可选的。
    • PipelineHandler.run 仅在 Server 和 Cloud 实例中生效,Standalone 实例不支持资源定义。
    @classmethod
    def run(
    cls, resources: t.Optional[t.Dict[str, t.Any]] = None, replicas: int = 1
    ) -> t.Callable:

    使用示例

    import typing as t

    import torch
    from starwhale import PipelineHandler

    class Example(PipelineHandler):
    def __init__(self) -> None:
    super().__init__()
    self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    self.model = self._load_model(self.device)

    @PipelineHandler.run(replicas=4, resources={"memory": 1 * 1024 * 1024 *1024, "nvidia.com/gpu": 1}) # 1G Memory, 1 GPU
    def predict(self, data: t.Dict):
    data_tensor = self._pre(data.img)
    output = self.model(data_tensor)
    return self._post(output)

    @PipelineHandler.run(resources={"memory": 1 * 1024 * 1024 *1024}) # 1G Memory
    def evaluate(self, ppl_result):
    result, label, pr = [], [], []
    for _data in ppl_result:
    label.append(_data["input"]["label"])
    result.extend(_data["output"][0])
    pr.extend(_data["output"][1])
    return label, result, pr

    def _pre(self, input: Image) -> torch.Tensor:
    ...

    def _post(self, input):
    ...

    def _load_model(self, device):
    ...

    Context

    执行模型评测过程中传入的上下文信息,包括Project、Task ID等。Context 的内容是自动注入的,可以通过如下方式使用:

    • 继承 PipelineHandler 类内使用 self.context 对象。
    • 通过 Context.get_runtime_context() 获取。

    需要注意,只有在模型评测过程中,才能使用Context,否则程序会抛出异常。

    目前Context可以获得如下值:

    • project: str
      • Project 名字。
    • version: str
      • 模型评测的唯一ID。
    • step: str
      • Step 名字。
    • total: int
      • Step 下所有 Task 的数量。
    • index: int
      • Task 索引标号,下标从0开始。
    • dataset_uris: List[str]
      • Starwhale 数据集的URI 列表。

    使用示例


    from starwhale import Context, PipelineHandler

    def func():
    ctx = Context.get_runtime_context()
    print(ctx.project)
    print(ctx.version)
    print(ctx.step)
    ...

    class Example(PipelineHandler):

    def predict(self, data: t.Dict):
    print(self.context.project)
    print(self.context.version)
    print(self.context.step)

    @starwhale.api.service.api

    @starwhale.api.service.api 是一个修饰器,提供基于 Gradio 的简易 Web Handler 输入定义,当用户使用 swcli model serve 命令启动 Web Service 接收外部请求,并将推理结果返回给用户,实现在线评测。

    使用示例

    import gradio
    from starwhale.api.service import api

    def predict_image(img):
    ...

    @api(gradio.File(), gradio.Label())
    def predict_view(file: t.Any) -> t.Any:
    with open(file.name, "rb") as f:
    data = Image(f.read(), shape=(28, 28, 1))
    _, prob = predict_image({"img": data})
    return {i: p for i, p in enumerate(prob)}

    starwhale.api.service.Service

    如果希望自定义 web service 的实现, 可以继承 Service 并重写 serve 函数即可。

    class CustomService(Service):
    def serve(self, addr: str, port: int, handler_list: t.List[str] = None) -> None:
    ...

    svc = CustomService()

    @svc.api(...)
    def handler(data):
    ...

    说明:

    • 使用 PipelineHandler.add_api 函数添加的 handler 和 api 以及实例化的 Service.api decorator 添加的 handler 可以同时生效
    • 如果使用自定义的 Service, 需要在 model 中实例化自定义的 Service 类

    自定义 Request 和 Response

    Request 和 Response 分别是用于接收用户请求和返回给用户结果的处理类, 可以简单的理解成是 handler 的前处理和后处理逻辑

    Starwhale 将支持 Dataset 内置类型的 Request 实现以及 Json Response 的实现, 同时用户可以自定义处理逻辑来使用, 自定义的示例如下:

    import typing as t

    from starwhale.api.service import (
    Request,
    Service,
    Response,
    )

    class CustomInput(Request):
    def load(self, req: t.Any) -> t.Any:
    return req


    class CustomOutput(Response):
    def __init__(self, prefix: str) -> None:
    self.prefix = prefix

    def dump(self, req: str) -> bytes:
    return f"{self.prefix} {req}".encode("utf-8")

    svc = Service()

    @svc.api(request=CustomInput(), response=CustomOutput("hello"))
    def foo(data: t.Any) -> t.Any:
    ...
    - + \ No newline at end of file diff --git a/zh/0.6.0/reference/sdk/job/index.html b/zh/0.6.0/reference/sdk/job/index.html index a56a20e3f..1dc1df8bc 100644 --- a/zh/0.6.0/reference/sdk/job/index.html +++ b/zh/0.6.0/reference/sdk/job/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    Starwhale 任务 SDK

    job

    通过Job URI参数获取 starwhale.Job 对象,可以获得 Standalone/Server/Cloud 实例上的任务。

    @classmethod
    def job(
    cls,
    uri: str,
    ) -> Job:

    参数

    • uri: (str, required)
      • Job URI格式的字符串。

    使用示例

    from starwhale import job
    # get job object of uri=https://server/job/1
    j1 = job("https://server/job/1")
    # get job from standalone instance
    j2 = job("local/project/self/job/xm5wnup")
    j3 = job("xm5wnup")

    class starwhale.Job

    starwhale.Job 实现对 Starwhale 任务的抽象,能够对 Standalone/Server/Cloud 实例上的任务进行一些信息获取类的操作。

    list

    list 是一个 classmethod 方法,能够列出某个项目下的任务。

    @classmethod
    def list(
    cls,
    project: str = "",
    page_index: int = DEFAULT_PAGE_IDX,
    page_size: int = DEFAULT_PAGE_SIZE,
    ) -> t.Tuple[t.List[Job], t.Dict]:

    参数

    • project: (str, optional)
      • Project URI,Standalone/Server/Cloud 实例上的项目都可以。
      • 若不指定 project 参数,则使用 swcli project selected 命令选定的项目。
    • page_index: (int, optional)
      • 当获取 Server/Cloud 实例的项目列表时,支持翻页操作,该参数可以指定页面序号。
        • 默认值为 1。
        • 页面起始序号为 1。
      • Standalone 实例不支持翻页操作,设置该参数无效。
    • page_size: (int, optional)
      • 当获取 Server/Cloud 实例的项目列表时,支持翻页操作,该参数可以指定每页返回的任务数量。
        • 默认值为 1。
        • 页面起始序号为 1。
      • Standalone 实例不支持翻页操作,设置该参数无效。

    使用示例

    from starwhale import Job
    # list jobs of current selected project
    jobs, pagination_info = Job.list()
    # list jobs of starwhale/public project in the cloud.starwhale.cn instance
    jobs, pagination_info = Job.list("https://cloud.starwhale.cn/project/starwhale:public")
    # list jobs of id=1 project in the server instance, page index is 2, page size is 10
    jobs, pagination_info = Job.list("https://server/project/1", page_index=2, page_size=10)

    get

    get 是一个 classmethod 方法,能够获得某个特定任务的信息,返回 Starwhale.Job 对象,与 starwhale.job 函数功能和参数定义上完全一致。

    使用示例

    from starwhale import Job
    # get job object of uri=https://server/job/1
    j1 = Job.get("https://server/job/1")
    # get job from standalone instance
    j2 = Job.get("local/project/self/job/xm5wnup")
    j3 = Job.get("xm5wnup")

    summary

    summary 是一个 property 属性,返回任务运行中写入 summary 表中的数据,字典类型。

    @property
    def summary(self) -> t.Dict[str, t.Any]:

    使用示例

    from starwhale import jobs
    j1 = job("https://server/job/1")
    print(j1.summary)

    tables

    tables 是一个 property 属性,返回任务运行中创建的表名(不包括 summary 表,以为 summary 表是项目级别自动创建的),列表类型。

    @property
    def tables(self) -> t.List[str]:

    使用示例

    from starwhale import jobs
    j1 = job("https://server/job/1")
    print(j1.tables)

    get_table_rows

    get_table_rows 是一个 method 方法,可以根据表名等参数返回数据表的记录,迭代器类型。

    def get_table_rows(
    self,
    name: str,
    start: t.Any = None,
    end: t.Any = None,
    keep_none: bool = False,
    end_inclusive: bool = False,
    ) -> t.Iterator[t.Dict[str, t.Any]]:

    参数

    • name: (str, required)
      • datastore 表名。通过 tables 属性获得的表名,可以传给 name 参数。
    • start: (Any, optional)
      • 返回记录中,ID的起始值。
      • 默认值为 None,表示从头开始。
    • end: (Any, optional)
      • 返回记录中,ID的结束值。
      • 默认值为 None ,表示一直到表末尾。
      • startend 都为 None,则会以迭代器方式返回整个表的数据。
    • keep_none: (bool, optional)
      • 是否返回值为 None的记录。
      • 默认为 False。
    • end_inclusive: (bool, optional)
      • end 参数设置时,迭代记录的时候,是否包含end记录。
      • 默认为 False。

    使用示例

    from starwhale import job
    j = job("local/project/self/job/xm5wnup")
    table_name = j.tables[0]
    for row in j.get_table_rows(table_name):
    print(row)
    rows = list(j.get_table_rows(table_name, start=0, end=100))
    # return the first record from the results table
    result = list(j.get_table_rows('results', start=0, end=1))[0]

    status

    status 是一个 property 属性,返回当前Job的实时状态,字符串类型,状态包含 CREATED, READY, PAUSED, RUNNING, CANCELLING, CANCELED, SUCCESS, FAILUNKNOWN

    @property
    def status(self) -> str:

    create

    create 是一个 classmethod 方法,能够创建 Standalone 实例或 Server/Cloud 实例上的任务,包括Model Evluation, Fine-tuning, Online Serving 和 Developing 等类型的任务。函数返回 Job 类型的对象。

    • create 通过 project 参数决定生成的任务运行在何种实例上,包括 Standalone 和 Server/Cloud 实例。
    • 在 Standalone 实例下,create 创建一个同步执行的任务。
    • 在 Server/Cloud 实例下, create 创建一个异步执行的任务。
    @classmethod
    def create(
    cls,
    project: Project | str,
    model: Resource | str,
    run_handler: str,
    datasets: t.List[str | Resource] | None = None,
    runtime: Resource | str | None = None,
    resource_pool: str = DEFAULT_RESOURCE_POOL,
    ttl: int = 0,
    dev_mode: bool = False,
    dev_mode_password: str = "",
    dataset_head: int = 0,
    overwrite_specs: t.Dict[str, t.Any] | None = None,
    ) -> Job:

    参数

    对所有实例都生效的参数:

    • project: (Project|str, required)
      • Project 对象或 Project URI 字符串。
    • model: (Resource|str, required)
      • Model URI 字符串或 Model 类型的 Resource 对象,表示要运行的 Starwhale 模型包。
    • run_handler: (str, required)
      • Starwhale 模型包中对应的可运行的 handler 名称,比如 mnist 的 evaluate handler: mnist.evaluator:MNISTInference.evaluate
    • datasets: (List[str | Resource], optional)
      • Starwhale 模型包运行所需要的数据集,非必需。

    仅对 Standalone 实例生效的参数:

    • dataset_head: (int, optional)
      • 一般用于调试场景,只使用数据集前 N 条数据来供 Starwhale 模型来消费。

    仅对 Server/Cloud 实例生效的参数:

    • runtime: (Resource | str, optional)
      • Runtime URI 字符串或 Runtime 类型的 Resource 对象,表示要运行任务所需要的 Starwhale 运行时。
      • 当不指定该参数时,会尝试使用 Starwhale 模型包的内建运行时。
      • 创建 Standalone 实例下的任务,使用 Python 脚本所用的 Python 解释器环境作为自己的运行时,不支持通过 runtime 参数指定运行时。若有指定运行时的需要,可以使用 swcli model run 命令。
    • resource_pool: (str, optional)
      • 指定任务运行在哪个资源池中,默认为 default 资源池。
    • ttl: (int, optional)
      • 任务最大存活时间,超时后会被杀掉。
      • 参数单位为秒。
      • 默认情况下,ttl为0,表示没有超时限制,任务会按预期运行。
      • 当ttl小于0时,也表示没有超时限制。
    • dev_mode: (bool, optional)
      • 是否设置为调试模式。开启此模式后,可以通过VSCode Web进入到相关环境中。
      • 默认不进入调试模式。
    • dev_mode_password: (str, optional)
      • 调试模式下VSCode Web的登录密码。
      • 默认为空,此时会用任务的UUID作为密码,可以通过 job.info().job.uuid 获得。
    • overwrite_specs: (Dict[str, Any], optional)
      • 支持设置 handler 的 replicasresources 字段。
      • 若为空,则使用模型包中对应 handler 设置的值。
      • overwrite_specs 的 key 为 handler 的名字,比如 mnist 的 evaluate handler: mnist.evaluator:MNISTInference.evaluate
      • overwrite_specs 的 value 为设置的值,字典格式,支持设置 replicasresources , 比如 {"replicas": 1, "resources": {"memory": "1GiB"}}

    使用示例

    • 创建一个 Cloud 实例的任务
    from starwhale import Job
    project = "https://cloud.starwhale.cn/project/starwhale:public"
    job = Job.create(
    project=project,
    model=f"{project}/model/mnist/version/v0",
    run_handler="mnist.evaluator:MNISTInference.evaluate",
    datasets=[f"{project}/dataset/mnist/version/v0"],
    runtime=f"{project}/runtime/pytorch",
    overwrite_specs={"mnist.evaluator:MNISTInference.evaluate": {"resources": "4GiB"},
    "mnist.evaluator:MNISTInference.predict": {"resources": "8GiB", "replicas": 10}}
    )
    print(job.status)
    • 创建一个 Standalone 实例的任务
    from starwhale import Job
    job = Job.create(
    project="self",
    model="mnist",
    run_handler="mnist.evaluator:MNISTInference.evaluate",
    datasets=["mnist"],
    )
    print(job.status)
    - + \ No newline at end of file diff --git a/zh/0.6.0/reference/sdk/model/index.html b/zh/0.6.0/reference/sdk/model/index.html index 93f1091c4..5809058dd 100644 --- a/zh/0.6.0/reference/sdk/model/index.html +++ b/zh/0.6.0/reference/sdk/model/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    Starwhale 模型 SDK

    model.build

    model.build 是一个函数,能够构建 Starwhale 模型,等价于 swcli model build 命令。

    def build(
    modules: t.Optional[t.List[t.Any]] = None,
    workdir: t.Optional[_path_T] = None,
    name: t.Optional[str] = None,
    project_uri: str = "",
    desc: str = "",
    remote_project_uri: t.Optional[str] = None,
    add_all: bool = False,
    tags: t.List[str] | None = None,
    ) -> None:

    参数

    • modules: (List[str|object], optional)
      • 构建时导入的模块,为列表类型,可以指定多个模块。
      • 模块类型包含两种:
        • 字符串类型: Python 可 Import 的路径,比如 "to.path.module", "to.path.module:object" 。
        • Python 对象: model.build 函数会自动解析所对应的模块。
      • 如果不指定,则会搜索当前已经导入的模块。
    • name: (str, optional)
      • Starwhale 模型的名称。
      • 若不指定,则会使用 cwd 目录名作为 Starwhale 模型的名称。
    • workdir: (str, Pathlib.Path, optional)
      • Starwhale 模型打包的根目录,此目录下的文件会被打包。
    • project_uri: (str, optional)
      • Project URI,表示该模型属于哪个项目。
      • 默认为 swcli project select 选择的项目。
    • desc: (str, optional)
      • 描述信息,默认为空。
    • remote_project_uri: (str, optional)
      • 其他示例的项目 URI,构建完Starwhale 模型后,会被自动复制到远端实例中。
    • add_all: (bool, optional)
      • Starwhale 模型打包的时候会自动忽略一些类似 pyc/venv/conda 构建目录等,可以通过该参数将这些文件也进行打包。即使该参数使用,也不影响 .swignore 文件的预期作用。
      • 默认为 False
    • tags: (List[str], optional)
      • 用户自定义标签。
      • 不能指定 latest^v\d+$ 这两个 Starwhale 系统内建标签。

    使用示例

    from starwhale import model

    # class search handlers
    from .user.code.evaluator import ExamplePipelineHandler
    model.build([ExamplePipelineHandler])

    # function search handlers
    from .user.code.evaluator import predict_image
    model.build([predict_image])

    # module handlers, @handler decorates function in this module
    from .user.code import evaluator
    model.build([evaluator])

    # str search handlers
    model.build(["user.code.evaluator:ExamplePipelineHandler"])
    model.build(["user.code1", "user.code2"])

    # no search handlers, use imported modules
    model.build()

    # add user custom tags
    model.build(tags=["t1", "t2"])
    - + \ No newline at end of file diff --git a/zh/0.6.0/reference/sdk/other/index.html b/zh/0.6.0/reference/sdk/other/index.html index 81a9aa221..0dd96557b 100644 --- a/zh/0.6.0/reference/sdk/other/index.html +++ b/zh/0.6.0/reference/sdk/other/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    其他 SDK

    __version__

    Starwhale Python SDK 和 swcli 版本,是字符串常量。

    >>> from starwhale import __version__
    >>> print(__version__)
    0.5.7

    init_logger

    init_logger 用来设置日志输出级别。默认为0

    • 0: 输出 errors 信息,traceback 呈现最近的1个堆栈。
    • 1: 输出 errors + warnings 信息,traceback 呈现最近的5个堆栈内容。
    • 2: 输出 errors + warnings + info 信息,trackback 呈现最多10个堆栈内容。
    • 3: 输出 errors + warnings + info + debug 信息,trackback 呈现最多100个堆栈内容。
    • >=4: 输出 errors + warnings + info + debug + trace 信息,trackback 呈现最多1000个堆栈内容。
    def init_logger(verbose: int = 0) -> None:

    login

    登录 server/cloud 实例,等价于 swcli instance login 命令。登录 Standalone 实例是无意义的。

    def login(
    instance: str,
    alias: str = "",
    username: str = "",
    password: str = "",
    token: str = "",
    ) -> None:

    参数

    • instance: (str, required)
      • server/cloud 实例的 http url。
    • alias: (str, optional)
      • 实例的别名,可以简化 Starwhale URI 中 instance部分。
      • 若不指定,则使用实例的 http url 中 hostname 部分。
    • username: (str, optional)
    • password: (str, optional)
    • token: (str, optional)
      • username + passwordtoken 只能选择一种方式登录实例。

    使用示例

    from starwhale import login

    # login to Starwhale Cloud instance by token
    login(instance="https://cloud.starwhale.cn", alias="cloud-cn", token="xxx")

    # login to Starwhale Server instance by username and password
    login(instance="http://controller.starwhale.svc", alias="dev", username="starwhale", password="abcd1234")

    logout

    登出 server/cloud 实例, 等价于 swcli isntance logout 命令。登出 Standalone 实例是无意义的。

    def logout(instance: str) -> None:

    使用示例

    from starwhale import login, logout

    login(instance="https://cloud.starwhale.cn", alias="cloud-cn", token="xxx")
    # logout by the alias
    logout("cloud-cn")

    login(instance="http://controller.starwhale.svc", alias="dev", username="starwhale", password="abcd1234")
    # logout by the instance http url
    logout("http://controller.starwhale.svc")
    - + \ No newline at end of file diff --git a/zh/0.6.0/reference/sdk/overview/index.html b/zh/0.6.0/reference/sdk/overview/index.html index 2762aae21..ff96625a5 100644 --- a/zh/0.6.0/reference/sdk/overview/index.html +++ b/zh/0.6.0/reference/sdk/overview/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    Python SDK 概览

    Starwhale 提供一系列的 Python SDK,帮助用户管理数据集、模型和评测等调用,使用 Starwhale Python SDK 能让您更好的完成 ML/DL 开发任务。

    • class PipelineHandler: 提供默认的模型评测过程定义,需要用户实现 predictevaluate 函数。
    • class Context: 执行模型评测过程中传入的上下文信息,包括 Project、Task ID 等。
    • class Dataset: Starwhale 数据集类。
    • class starwhale.api.service.Service: 在线评测的基础类。
    • class Job: 提供 Job 相关的操作。
    • class Evaluation: 提供 Evaluation 的 log 和 scan 相关的操作。

    函数

    • @multi_classification: 修饰器,适用于多分类问题,用来简化 evaluate 结果的进一步计算和结果存储,能更好的呈现评测结果。
    • @handler: 修饰器,定义一个带有资源属性(mem/cpu/gpu)的运行实体,可以控制副本数。多个Handlers可以通过依赖关系,生成DAG,便于控制执行流程。
    • @evaluation.predict: 修饰器,定义模型评测中的推理过程,类似 MapReduce 中的 map 阶段。
    • @evaluation.evaluate: 修饰器,定义模型评测中的评测过程,类似 MapReduce 中的 reduce 阶段。
    • model.build: 进行 Starwhale 模型构建。
    • @fine_tune: 修饰器,定义模型训练的微调(fine-tune)过程。
    • init_logger: 设置日志输出级别,实现五种级别日志输出。
    • dataset: 获取 starwhale.Dataset 对象,包括创建新的数据集和加载已经存在的数据集两种方式。
    • @starwhale.api.service.api: 修饰器,提供基于 Gradio 的简易 Web Handler 输入定义,实现在线评测。
    • login: 登录 server/cloud 实例。
    • logout: 登出 server/cloud 实例。
    • job: 根据Job URI获得 starwhale.Job 对象。
    • @PipelineHandler.run: 修饰器,定义 PipelineHandler 子类中 predict 和 evaluate 方法的资源。

    数据类型

    • COCOObjectAnnotation: 提供COCO类型的定义。
    • BoundingBox: 边界框类型,目前为 LTWH 格式,即 left_x, top_y, widthheight
    • ClassLabel: 描述label的数量和类型。
    • Image: 图片类型。
    • GrayscaleImage: 灰度图类型,比如MNIST中数字手写体图片,是 Image 类型的一个特例。
    • Audio: 音频类型。
    • Video: 视频类型。
    • Text: 文本类型,默认为 utf-8 格式,用来存储大文本。
    • Binary: 二进制类型,用 bytes 存储,用来存储比较大的二进制内容。
    • Line: 直线类型。
    • Point: 点类型。
    • Polygon: 多边形类型。
    • Link: Link类型,用来制作 remote-link 类型的数据。
    • MIMEType: 描述 Starwhale 支持的多媒体类型,用在 ImageVideo 等类型的 mime_type 属性上,能更好的进行 Dataset Viewer。

    其他

    • __version__: Starwhale Python SDK 和 swcli 版本,是字符串常量。

    进一步阅读建议

    - + \ No newline at end of file diff --git a/zh/0.6.0/reference/sdk/type/index.html b/zh/0.6.0/reference/sdk/type/index.html index 81f55abb5..ac7cd3563 100644 --- a/zh/0.6.0/reference/sdk/type/index.html +++ b/zh/0.6.0/reference/sdk/type/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    Starwhale 数据类型 SDK

    COCOObjectAnnotation

    提供COCO类型的定义。

    COCOObjectAnnotation(
    id: int,
    image_id: int,
    category_id: int,
    segmentation: Union[t.List, t.Dict],
    area: Union[float, int],
    bbox: Union[BoundingBox, t.List[float]],
    iscrowd: int,
    )
    参数说明
    idobject id,一般为全局object的递增id
    image_idimage id,一般为图片id
    category_idcategory id,一般为目标检测中类别的id
    segmentation物体轮廓表示,Polygon(多边形的点)或RLE格式
    areaobject面积
    bbox表示bounding box,可以为BoundingBox类型或float的列表
    iscrowd0表示是一个单独的object,1表示两个没有分开的object

    使用示例

    def _make_coco_annotations(
    self, mask_fpath: Path, image_id: int
    ) -> t.List[COCOObjectAnnotation]:
    mask_img = PILImage.open(str(mask_fpath))

    mask = np.array(mask_img)
    object_ids = np.unique(mask)[1:]
    binary_mask = mask == object_ids[:, None, None]
    # TODO: tune permute without pytorch
    binary_mask_tensor = torch.as_tensor(binary_mask, dtype=torch.uint8)
    binary_mask_tensor = (
    binary_mask_tensor.permute(0, 2, 1).contiguous().permute(0, 2, 1)
    )

    coco_annotations = []
    for i in range(0, len(object_ids)):
    _pos = np.where(binary_mask[i])
    _xmin, _ymin = float(np.min(_pos[1])), float(np.min(_pos[0]))
    _xmax, _ymax = float(np.max(_pos[1])), float(np.max(_pos[0]))
    _bbox = BoundingBox(
    x=_xmin, y=_ymin, width=_xmax - _xmin, height=_ymax - _ymin
    )

    rle: t.Dict = coco_mask.encode(binary_mask_tensor[i].numpy()) # type: ignore
    rle["counts"] = rle["counts"].decode("utf-8")

    coco_annotations.append(
    COCOObjectAnnotation(
    id=self.object_id,
    image_id=image_id,
    category_id=1, # PennFudan Dataset only has one class-PASPersonStanding
    segmentation=rle,
    area=_bbox.width * _bbox.height,
    bbox=_bbox,
    iscrowd=0, # suppose all instances are not crowd
    )
    )
    self.object_id += 1

    return coco_annotations

    GrayscaleImage

    提供灰度图类型,比如MNIST中数字手写体图片,是 Image 类型的一个特例。

    GrayscaleImage(
    fp: _TArtifactFP = "",
    display_name: str = "",
    shape: Optional[_TShape] = None,
    as_mask: bool = False,
    mask_uri: str = "",
    )
    参数说明
    fp图片的路径、IO对象或文件内容的bytes
    display_nameDataset Viewer上展示的名字
    shape图片的Width和Height,channel默认为1
    as_mask是否作为Mask图片
    mask_uriMask原图的URI

    使用示例

    for i in range(0, min(data_number, label_number)):
    _data = data_file.read(image_size)
    _label = struct.unpack(">B", label_file.read(1))[0]
    yield GrayscaleImage(
    _data,
    display_name=f"{i}",
    shape=(height, width, 1),
    ), {"label": _label}

    GrayscaleImage函数

    GrayscaleImage.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    GrayscaleImage.carry_raw_data

    carry_raw_data() -> GrayscaleImage

    GrayscaleImage.astype

    astype() -> Dict[str, t.Any]

    BoundingBox

    提供边界框类型,目前为 LTWH 格式,即 left_x, top_y, widthheight

    BoundingBox(
    x: float,
    y: float,
    width: float,
    height: float
    )
    参数说明
    xleft_x的坐标
    ytop_y的坐标
    width图片的宽度
    height图片的高度

    ClassLabel

    描述label的数量和类型。

    ClassLabel(
    names: List[Union[int, float, str]]
    )

    Image

    图片类型。

    Image(
    fp: _TArtifactFP = "",
    display_name: str = "",
    shape: Optional[_TShape] = None,
    mime_type: Optional[MIMEType] = None,
    as_mask: bool = False,
    mask_uri: str = "",
    )
    参数说明
    fp图片的路径、IO对象或文件内容的bytes
    display_nameDataset Viewer上展示的名字
    shape图片的Width、Height和channel
    mime_typeMIMEType支持的类型
    as_mask是否作为Mask图片
    mask_uriMask原图的URI

    使用示例

    import io
    import typing as t
    import pickle
    from PIL import Image as PILImage
    from starwhale import Image, MIMEType

    def _iter_item(paths: t.List[Path]) -> t.Generator[t.Tuple[t.Any, t.Dict], None, None]:
    for path in paths:
    with path.open("rb") as f:
    content = pickle.load(f, encoding="bytes")
    for data, label, filename in zip(
    content[b"data"], content[b"labels"], content[b"filenames"]
    ):
    annotations = {
    "label": label,
    "label_display_name": dataset_meta["label_names"][label],
    }

    image_array = data.reshape(3, 32, 32).transpose(1, 2, 0)
    image_bytes = io.BytesIO()
    PILImage.fromarray(image_array).save(image_bytes, format="PNG")

    yield Image(
    fp=image_bytes.getvalue(),
    display_name=filename.decode(),
    shape=image_array.shape,
    mime_type=MIMEType.PNG,
    ), annotations

    Image函数

    Image.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Image.carry_raw_data

    carry_raw_data() -> GrayscaleImage

    Image.astype

    astype() -> Dict[str, t.Any]

    Video

    视频类型。

    Video(
    fp: _TArtifactFP = "",
    display_name: str = "",
    mime_type: Optional[MIMEType] = None,
    )
    参数说明
    fp视频的路径、IO对象或文件内容的bytes
    display_nameDataset Viewer上展示的名字
    mime_typeMIMEType支持的类型

    使用示例

    import typing as t
    from pathlib import Path

    from starwhale import Video, MIMEType

    root_dir = Path(__file__).parent.parent
    dataset_dir = root_dir / "data" / "UCF-101"
    test_ds_path = [root_dir / "data" / "test_list.txt"]

    def iter_ucf_item() -> t.Generator:
    for path in test_ds_path:
    with path.open() as f:
    for line in f.readlines():
    _, label, video_sub_path = line.split()

    data_path = dataset_dir / video_sub_path
    data = Video(
    data_path,
    display_name=video_sub_path,
    shape=(1,),
    mime_type=MIMEType.WEBM,
    )

    yield f"{label}_{video_sub_path}", {
    "video": data,
    "label": label,
    }

    Audio

    音频类型。

    Audio(
    fp: _TArtifactFP = "",
    display_name: str = "",
    mime_type: Optional[MIMEType] = None,
    )
    参数说明
    fp音频文件的路径、IO对象或文件内容的bytes
    display_nameDataset Viewer上展示的名字
    mime_typeMIMEType支持的类型

    使用示例

    import typing as t
    from starwhale import Audio

    def iter_item() -> t.Generator[t.Tuple[t.Any, t.Any], None, None]:
    for path in validation_ds_paths:
    with path.open() as f:
    for item in f.readlines():
    item = item.strip()
    if not item:
    continue

    data_path = dataset_dir / item
    data = Audio(
    data_path, display_name=item, shape=(1,), mime_type=MIMEType.WAV
    )

    speaker_id, utterance_num = data_path.stem.split("_nohash_")
    annotations = {
    "label": data_path.parent.name,
    "speaker_id": speaker_id,
    "utterance_num": int(utterance_num),
    }
    yield data, annotations

    Audio函数

    Audio.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Audio.carry_raw_data

    carry_raw_data() -> Audio

    Audio.astype

    astype() -> Dict[str, t.Any]

    Text

    文本类型,默认为 utf-8 格式。

    Text(
    content: str,
    encoding: str = "utf-8",
    )
    参数说明
    contenttext内容
    encodingtext的编码格式

    使用示例

    import typing as t
    from pathlib import Path
    from starwhale import Text

    def iter_item(self) -> t.Generator[t.Tuple[t.Any, t.Any], None, None]:
    root_dir = Path(__file__).parent.parent / "data"

    with (root_dir / "fra-test.txt").open("r") as f:
    for line in f.readlines():
    line = line.strip()
    if not line or line.startswith("CC-BY"):
    continue

    _data, _label, *_ = line.split("\t")
    data = Text(_data, encoding="utf-8")
    annotations = {"label": _label}
    yield data, annotations

    Text函数

    to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Text.carry_raw_data

    carry_raw_data() -> Text

    Text.astype

    astype() -> Dict[str, t.Any]

    Text.to_str

    to_str() -> str

    Binary

    二进制类型,用bytes存储。

    Binary(
    fp: _TArtifactFP = "",
    mime_type: MIMEType = MIMEType.UNDEFINED,
    )
    参数说明
    fp路径、IO对象或文件内容的bytes
    mime_typeMIMEType支持的类型

    Binary函数

    Binary.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Binary.carry_raw_data

    carry_raw_data() -> Binary

    Binary.astype

    astype() -> Dict[str, t.Any]

    Link类型,用来制作 remote-link 类型的数据集。

    Link(
    uri: str,
    auth: Optional[LinkAuth] = DefaultS3LinkAuth,
    offset: int = 0,
    size: int = -1,
    data_type: Optional[BaseArtifact] = None,
    )
    参数说明
    uri原始数据的uri地址,目前支持localFS和S3两种协议
    authLink Auth信息
    offset数据相对uri指向的文件偏移量
    size数据大小
    data_typeLink指向的实际数据类型,目前支持 Binary, Image, Text, AudioVideo 类型

    Link函数

    Link.astype

    astype() -> Dict[str, t.Any]

    MIMEType

    描述Starwhale支持的多媒体类型,用Python Enum类型实现,用在 ImageVideo 等类型的mime_type 属性上,能更好的进行Dataset Viewer。

    class MIMEType(Enum):
    PNG = "image/png"
    JPEG = "image/jpeg"
    WEBP = "image/webp"
    SVG = "image/svg+xml"
    GIF = "image/gif"
    APNG = "image/apng"
    AVIF = "image/avif"
    PPM = "image/x-portable-pixmap"
    MP4 = "video/mp4"
    AVI = "video/avi"
    WEBM = "video/webm"
    WAV = "audio/wav"
    MP3 = "audio/mp3"
    PLAIN = "text/plain"
    CSV = "text/csv"
    HTML = "text/html"
    GRAYSCALE = "x/grayscale"
    UNDEFINED = "x/undefined"

    Line

    描述直线。

    from starwhale import ds, Point, Line

    with dataset("collections") as ds:
    line_points = [
    Point(x=0.0, y=1.0),
    Point(x=0.0, y=100.0)
    ]
    ds.append({"line": line_points})
    ds.commit()

    Point

    描述点。

    from starwhale import ds, Point

    with dataset("collections") as ds:
    ds.append(Point(x=0.0, y=100.0))
    ds.commit()

    Polygon

    描述多边形。

    from starwhale import ds, Point, Polygon

    with dataset("collections") as ds:
    polygon_points = [
    Point(x=0.0, y=1.0),
    Point(x=0.0, y=100.0),
    Point(x=2.0, y=1.0),
    Point(x=2.0, y=100.0),
    ]
    ds.append({"polygon": polygon_points})
    ds.commit()
    - + \ No newline at end of file diff --git a/zh/0.6.0/reference/swcli/dataset/index.html b/zh/0.6.0/reference/swcli/dataset/index.html index b7a1b96b1..4e6fbb88b 100644 --- a/zh/0.6.0/reference/swcli/dataset/index.html +++ b/zh/0.6.0/reference/swcli/dataset/index.html @@ -10,7 +10,7 @@ - + @@ -21,7 +21,7 @@ | --page | N | Integer | 1 | 起始页码,仅限Server和Cloud实例。 | | --size | N | Integer | 20 | 一页中的数据集数量,仅限Server和Cloud实例。 | | --filter-fl | N | String | | 仅显示符合条件的数据集。该选项可以在一个命令中被多次重复使用。 |

    过滤器类型说明范例
    nameKey-Value数据集名称前缀--filter name=mnist
    ownerKey-Value数据集所有者名字--filter owner=starwhale
    latestFlag如果指定了该选项,结果中仅显示最新版本。--filter latest

    swcli dataset recover

    swcli [全局选项] dataset recover [选项] <DATASET>

    dataset recover 恢复以前删除的Starwhale数据集或版本。

    DATASET 是一个数据集URI。如果URI不包含版本,则会恢复所有删除的版本。

    已经被垃圾回收或者使用 --force 选项删除的 Starwhale 数据集或版本无法使用本命令恢复。

    选项必填项类型默认值说明
    --force-fNBooleanFalse如果使用了该选项,当前同名的Starwhale数据集或版本会被强制覆盖。

    swcli dataset remove

    swcli [全局选项] dataset remove [选项] <DATASET>

    dataset remove 删除指定的 Starwhale 数据集或某个版本。

    DATASET 是一个数据集URI。如果URI不包含版本,则删除指定数据集的所有版本。软删除的 Starwhale 数据集,可以通过 swcli dataset recover 命令进行恢复(未进行垃圾回收)。

    被删除的Starwhale数据集或版本可以通过 swcli dataset list --show-removed 列出。

    选项必填项类型默认值说明
    --force-fNBooleanFalse使用此选项永久删除某个Starwhale数据集或版本。删除后不可恢复。

    swcli dataset summary

    swcli [全局选项] dataset summary <DATASET>

    显示数据集摘要信息。DATASET 是一个数据集URI

    swcli dataset tag

    swcli [全局选项] dataset tag [选项] <DATASET> [TAGS]...

    dataset tag 将标签附加到指定的Starwhale数据集版本,同时支持删除和列出所有标签的功能。可以在数据集URI中使用标签替代版本ID。

    DATASET是一个数据集URI

    每个数据集版本可以包含任意数量的标签,但同一数据集中不允许有重复的标签名称。

    选项必填项类型默认值说明
    --remove-rNBooleanFalse使用该选项删除标签
    --quiet-qNBooleanFalse使用该选项以忽略错误,例如删除不存在的标签。
    --force-add-fNBooleanFalse当向 server/cloud 实例中添加标签时,若遇到其他版本的数据集已经使用该标签会提示报错,强制更新时可以使用 --force-add 参数。

    数据集标签的例子

    #- list tags of the mnist dataset
    swcli dataset tag mnist

    #- add tags for the mnist dataset
    swcli dataset tag mnist t1 t2
    swcli dataset tag cloud://cloud.starwhale.cn/project/public:starwhale/dataset/mnist/version/latest t1 --force-ad
    swcli dataset tag mnist t1 --quiet

    #- remove tags for the mnist dataset
    swcli dataset tag mnist -r t1 t2
    swcli dataset tag cloud://cloud.starwhale.cn/project/public:starwhale/dataset/mnist --remove t1
    - + \ No newline at end of file diff --git a/zh/0.6.0/reference/swcli/index.html b/zh/0.6.0/reference/swcli/index.html index 3471fcc98..83e12b3aa 100644 --- a/zh/0.6.0/reference/swcli/index.html +++ b/zh/0.6.0/reference/swcli/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    概述

    使用方式

    swcli [选项] <COMMAND> [参数]...
    备注

    swcliswstarwhale三个命令的作用是一样的。

    全局选项

    选项说明
    --version显示swcli的版本信息。
    --verbose-v日志中输出更多信息,当 -v 参数越多,呈现信息越多,最多支持4个 -v 参数。
    --help输出命令帮助信息。
    警告

    需要注意的是,全局参数需要跟在swcli之后,命令之前。

    命令

    - + \ No newline at end of file diff --git a/zh/0.6.0/reference/swcli/instance/index.html b/zh/0.6.0/reference/swcli/instance/index.html index 0296191d7..451ad0154 100644 --- a/zh/0.6.0/reference/swcli/instance/index.html +++ b/zh/0.6.0/reference/swcli/instance/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    swcli instance

    概述

    swcli [全局选项] instance [选项] <SUBCOMMAND> [参数]

    instance命令包括以下子命令:

    • info
    • list (ls)
    • login
    • logout
    • use (select)

    swcli instance info

    swcli [全局选项] instance info [选项] <INSTANCE>

    instance info 输出指定 Starwhale 实例的详细信息。

    INSTANCE 是一个实例URI

    swcli instance list

    swcli [全局选项] instance list [选项]

    instance list 显示所有的 Starwhale 实例。

    swcli instance login

    swcli [全局选项] instance login [选项] <INSTANCE>

    instance login 连接到一个 Server/Cloud 实例并将它设置为默认实例.

    INSTANCE 是一个实例URI

    选项必填项类型默认值说明
    --usernameNString登录用户名
    --passwordNString登录密码
    --tokenNString登录令牌
    --aliasYString实例别名。您可以在任何需要实例URI的地方使用对应的别名替代。

    --username--password 不能和 --token 一起使用。

    swcli instance logout

    swcli [全局选项] instance logout [INSTANCE]

    instance logout 断开和 Server/Cloud 实例的连接并清除本地保存的信息。

    INSTANCE是一个实例URI。如果不指定,将使用默认实例

    swcli instance use

    swcli [全局选项] instance use <INSTANCE>

    instance use 将指定的实例设置为默认实例.

    INSTANCE 是一个实例URI

    - + \ No newline at end of file diff --git a/zh/0.6.0/reference/swcli/job/index.html b/zh/0.6.0/reference/swcli/job/index.html index 295a8ec28..b3731a86c 100644 --- a/zh/0.6.0/reference/swcli/job/index.html +++ b/zh/0.6.0/reference/swcli/job/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    swcli job

    概述

    swcli [全局选项] job [选项] <子命令> [参数]...

    job命令包括以下子命令:

    • cancel
    • info
    • list(ls)
    • pause
    • recover
    • remove(rm)
    • resume

    swcli job cancel

    swcli [全局选项] job cancel [选项] <JOB>

    job cancel 停止指定的作业。该命令在 Standalone 实例下只对容器方式运行的作业生效。

    JOB 是一个作业URI

    选项必填项类型默认值说明
    --force or -fNBooleanFalse如果为真,强制停止指定的作业。

    swcli job info

    swcli [全局选项] job info [选项] <JOB>

    job info 输出指定作业的详细信息。

    JOB 是一个作业URI

    swcli job list

    swcli [全局选项] job list [选项]

    job list显示所有的 Starwhale 作业。

    选项必填项类型默认值说明
    --projectNString要查看的项目的 URI。如果未指定此选项,则使用默认项目替代。
    --show-removedNBooleanFalse如果使用了该选项,则结果中会包含已删除但未被垃圾回收的作业。
    --pageNInteger1起始页码。仅限 Server 和 Cloud 实例。
    --sizeNInteger20一页中的作业数。仅限 Server 和 Cloud 实例。

    swcli job pause

    swcli [全局选项] job pause [选项] <JOB>

    job pause 暂停指定的作业. 被暂停的作业可以使用 job resume 恢复。该命令在 Standalone 实例下只对容器方式运行的作业生效。

    JOB 是一个作业URI

    pausecancel 功能上基本相同。它们的差别在于被暂停的作业会保留作业ID,在恢复时继续使用。作业的开发者需要定期保存作业数据并在恢复的时候重新加载相关数据。作业ID 可以用作保存数据的键值。

    选项必填项类型默认值说明
    --force or -fNBooleanFalse如果为真,强制停止指定的作业。

    swcli job resume

    swcli [全局选项] job resume [选项] <JOB>

    job resume 恢复指定的作业。该命令在 Standalone 实例下只对容器方式运行的作业生效。

    JOB 是一个作业URI

    - + \ No newline at end of file diff --git a/zh/0.6.0/reference/swcli/model/index.html b/zh/0.6.0/reference/swcli/model/index.html index e98b09726..c3f9f9350 100644 --- a/zh/0.6.0/reference/swcli/model/index.html +++ b/zh/0.6.0/reference/swcli/model/index.html @@ -10,14 +10,14 @@ - +
    版本:0.6.0

    swcli model

    概述

    swcli [全局选项] model [选项] <SUBCOMMAND> [参数]...

    model命令包括以下子命令:

    • build
    • copy(cp)
    • diff
    • extract
    • history
    • info
    • list(ls)
    • recover
    • remove(rm)
    • run
    • serve
    • tag

    swcli model build

    swcli [全局选项] model build [选项] <WORKDIR>

    model build 会将整个 WORKDIR 打包到Starwhale模型中,.swignore匹配的文件除外。

    model build 会导入 --module 参数指定的模块,然后生成运行模型所需要的配置。如果您指定的模块依赖第三方库,我们强烈建议您使用 --runtime 选项。如果不指定该选项,您需要确保 swcli 所使用的 Python 环境已经安装了相关的依赖库。

    选项必填项类型默认值说明
    --project-pNString默认项目项目URI
    --model-yaml-fNString${workdir}/model.yamlmodel.yaml 文件路径,默认会尝试使用 ${workdir}/model.yaml 文件。model.yaml 对于模型构建并非必需的。
    --module-mNString构建时导入的模块。Starwhale 会将这些模块中包含的 handler 导出到模型包。该参数可以指定多次,用来导入多个 Python 模块。
    --runtimeNString运行此命令时使用的 Starwhale Runtime的URI。如果指定此选项,该命令将在 Starwhale 运行时指定的独立 Python 环境中运行。否则它将直接在 swcli 当前的 Python 环境中运行。
    --name-nNString模型包的名字
    --desc-dNString模型包的描述
    --package-runtime--no-package-runtimeNBooleanTrue当使用 --runtime 参数时,默认情况下,会将对应的 Starwhale 运行时变成 Starwhale 模型的内置运行时。可以通过 --no-package-runtime 参数禁用该特性。
    --add-allNBooleanFalseStarwhale 模型打包的时候会自动忽略一些类似 pyc/venv/conda 构建目录等,可以通过该参数将这些文件也进行打包。即使该参数使用,也不影响 .swignore 文件的预期作用。
    -t--tagN全局String

    Starwhale 模型构建的例子

    # build by the model.yaml in current directory and model package will package all the files from the current directory.
    swcli model build .
    # search model run decorators from mnist.evaluate, mnist.train and mnist.predict modules, then package all the files from the current directory to model package.
    swcli model build . --module mnist.evaluate --module mnist.train --module mnist.predict
    # build model package in the Starwhale Runtime environment.
    swcli model build . --module mnist.evaluate --runtime pytorch/version/v1
    # forbid to package Starwhale Runtime into the model.
    swcli model build . --module mnist.evaluate --runtime pytorch/version/v1 --no-package-runtime
    # build model package with tags.
    swcli model build . --tag tag1 --tag tag2

    swcli model copy

    swcli [全局选项] model copy [选项] <SRC> <DEST>

    model copy 将模型从 SRC 复制到 DEST,用来实现不同实例的模型分享。这里 SRCDEST 都是模型URI

    Starwhale 模型复制时,默认会带上用户自定义的所有标签,可以使用 --ignore-tag 参数,忽略某些标签。另外,latest^v\d+$ 标签是 Starwhale 系统内建标签,只在当前实例中使用,不会拷贝到其他实例中。

    选项必填项类型默认值说明
    --force-fNBooleanFalse如果为true,DEST已经存在时会被强制覆盖。否则此命令会显示一条错误消息。另外,如果复制时携带的标签已经被其他版本使用,通过该参数可以强制更新标签到此版本上。
    -i--ignore-tagNString可以指定多次,忽略多个用户自定义标签。

    Starwhale 模型复制的例子

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local project(myproject) with a new model name 'mnist-local'
    swcli model cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq local/project/myproject/mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local default project(self) with the cloud instance model name 'mnist-cloud'
    swcli model cp cloud://pre-k8s/project/model/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq .

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local project(myproject) with the cloud instance model name 'mnist-cloud'
    swcli model cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq . -dlp myproject

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local default project(self) with a model name 'mnist-local'
    swcli model cp cloud://pre-k8s/project/model/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local project(myproject) with a model name 'mnist-local'
    swcli model cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local -dlp myproject

    #- copy standalone instance(local) default project(self)'s mnist-local model to cloud instance(pre-k8s) mnist project with a new model name 'mnist-cloud'
    swcli model cp mnist-local/version/latest cloud://pre-k8s/project/mnist/mnist-cloud

    #- copy standalone instance(local) default project(self)'s mnist-local model to cloud instance(pre-k8s) mnist project with standalone instance model name 'mnist-local'
    swcli model cp mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy standalone instance(local) default project(self)'s mnist-local model to cloud instance(pre-k8s) mnist project without 'cloud://' prefix
    swcli model cp mnist-local/version/latest pre-k8s/project/mnist

    #- copy standalone instance(local) project(myproject)'s mnist-local model to cloud instance(pre-k8s) mnist project with standalone instance model name 'mnist-local'
    swcli model cp local/project/myproject/model/mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy without some tags
    swcli model cp mnist cloud://cloud.starwhale.cn/project/starwhale:public --ignore-tag t1

    swcli model diff

    swcli [全局选项] model diff [选项] <MODEL VERSION> <MODEL VERSION>

    model diff 比较同一模型的两个版本之间的差异。

    MODEL VERSION 是一个模型URI

    选项必填项类型默认值说明
    --show-detailsNBooleanFalse使用该选项输出详细的差异信息。

    swcli model extract

    swcli [全局选项] model extract [选项] <MODEL> <TARGET_DIR>

    model extract 能够对将Starwhale 模型解压到指定目录中,方便进行后续改造。

    MODEL 是一个模型URI

    选项必填项类型默认值说明
    --force-fNBooleanFalse如果使用了该选项,会强制覆盖目标目录已经存在的模型解压文件。

    Starwhale 模型解压的例子

    #- extract mnist model package to current directory
    swcli model extract mnist/version/xxxx .

    #- extract mnist model package to current directory and force to overwrite the files
    swcli model extract mnist/version/xxxx . -f

    swcli model history

    swcli [全局选项] model history [选项] <MODEL>

    model history输出指定Starwhale模型的所有历史版本。

    MODEL是一个模型URI

    选项必填项类型默认值说明
    --fullnameNBooleanFalse显示完整的版本名称。如果没有使用该选项,则仅显示前 12 个字符。

    swcli model info

    swcli [全局选项] model info [选项] <MODEL>

    model info输出指定Starwhale模型版本的详细信息。

    MODEL是一个模型URI

    选项必填项类型默认值说明
    --output-filter-ofNChoice of [basic/model_yaml/manifest/files/handlers/all]basic设置输出的过滤规则,比如只显示Model的model.yaml。目前该参数仅对Standalone Instance的Model生效。

    Starwhale 模型信息查看的例子

    swcli model info mnist # show basic info from the latest version of model
    swcli model info mnist/version/v0 # show basic info from the v0 version of model
    swcli model info mnist/version/latest --output-filter=all # show all info
    swcli model info mnist -of basic # show basic info
    swcli model info mnist -of model_yaml # show model.yaml
    swcli model info mnist -of handlers # show model runnable handlers info
    swcli model info mnist -of files # show model package files tree
    swcli -o json model info mnist -of all # show all info in json format

    swcli model list

    swcli [全局选项] model list [选项]

    model list显示所有的Starwhale模型。

    选项必填项类型默认值说明
    --projectNString要查看的项目的URI。如果未指定此选项,则使用默认项目替代。
    --fullnameNBooleanFalse显示完整的版本名称。如果没有使用该选项,则仅显示前 12 个字符。
    --show-removedNBooleanFalse如果使用了该选项,则结果中会包含已删除但未被垃圾回收的模型。
    --pageNInteger1起始页码。仅限Server和Cloud实例。
    --sizeNInteger20一页中的模型数。仅限Server和Cloud实例。
    --filter-flNString仅显示符合条件的模型。该选项可以在一个命令中被多次重复使用。
    过滤器类型说明范例
    nameKey-Value模型名称前缀--filter name=mnist
    ownerKey-Value模型所有者名字--filter owner=starwhale
    latestFlag如果指定了该选项,结果中仅显示最新版本。--filter latest

    swcli model recover

    swcli [全局选项] model recover [选项] <MODEL>

    model recover 恢复以前删除的 Starwhale 模型或版本。

    MODEL是一个模型URI。如果 URI 不包含版本,则会恢复所有删除的版本。

    已经被垃圾回收或者使用 --force 选项删除的 Starwhale 模型或版本无法使用本命令恢复。

    选项必填项类型默认值说明
    --force-fNBooleanFalse如果使用了该选项,当前同名的 Starwhale 模型或版本会被强制覆盖。

    swcli model remove

    swcli [全局选项] model remove [选项] <MODEL>

    model remove 删除指定的 Starwhale 模型或某个版本。

    MODEL 是一个模型URI。如果URI不包含版本,则删除指定模型的所有版本。

    被删除的 Starwhale 模型或版本可以在垃圾回收之前通过 swcli model recover 恢复。要永久删除某个Starwhale模型或版本,您可以使用 --force 选项。

    被删除的 Starwhale 模型或版本可以通过 swcli model list --show-removed 列出。

    选项必填项类型默认值说明
    --force-fNBooleanFalse使用此选项永久删除某个Starwhale模型或版本。删除后不可恢复。

    swcli model run

    swcli [全局选项] model run [选项]

    model run 运行一个模型的 Handler。该命令提供两种模式: model URI模式和本地开发模式。 model URI模式需要一个预先构建好的模型包,本地开发模式仅需要 model 代码目录即可。

    选项必填项类型默认值说明
    --workdir-wNString在本地开发模式中使用,指定 model 代码目录地址。
    --uri-uNString在model URI模式中使用,指定 model URI。
    --handler-hNString运行的Handler索引或名字,默认运行第一个Handler。格式为序号或Handler的名字。
    --module-mNStringPython Module 的名字,是一个可以被 import 的 Python Module 路径。该参数可以被设置多次。
    --runtime-rNString运行此命令时使用的Starwhale Runtime的 URI。如果指定此选项,该命令将在 Starwhale 运行时指定的独立 Python 环境中运行。否则它将直接在 swcli 当前的 Python 环境中运行。
    --model-yaml-fNString${MODEL_DIR}/model.yamlmodel.yaml 的路径。model.yaml 对于 model run 是非必须的。
    --run-project-pNString默认的 ProjectProject URI,表示 model run 的结果存储到对应的项目中。
    --dataset-dNStringDataset URI,模型运行所需要的 Starwhale 数据集。该参数可以被设置多次。
    --dataset-head-dhNInteger0在 Standalone Instance 下,用于调试目的,一般只需要运行数据集的一部分数据即可,可以通过 --dataset-head 参数来设定。默认值为0,表示会使用数据集所有数据。
    --in-containerNBooleanFalse使用docker镜像来运行模型。此选项仅适用于 Standalone 实例。Server 和 Cloud 实例始终使用 docker 镜像。如果指定的 runtime 是基于 docker 镜像构建的,此选项总是为真。
    --forbid-snapshot-fsNBooleanFalse当在model URI模式下,每次模型运行,都会使用一个全新的快照目录,设置该参数后直接使用模型的 workdir 目录作为运行目录。本地开发模式下,此参数不生效,每次运行都是在 --workdir 指定的目录中。
    -- --user-arbitrary-argsNString你在handlers中预设的参数 赋值.

    Starwhale 模型运行的例子

    # --> run by model uri
    # run the first handler from model uri
    swcli model run -u mnist/version/latest
    # run index id(1) handler from model uri
    swcli model run --uri mnist/version/latest --handler 1
    # run index fullname(mnist.evaluator:MNISTInference.cmp) handler from model uri
    swcli model run --uri mnist/version/latest --handler mnist.evaluator:MNISTInference.cmp

    # --> run by the working directory, which does not build model package yet. Make local debug happy.
    # run the first handler from the working directory, use the model.yaml in the working directory
    swcli model run -w .
    # run index id(1) handler from the working directory, search mnist.evaluator module and model.yaml handlers(if existed) to get runnable handlers
    swcli model run --workdir . --module mnist.evaluator --handler 1
    # run index fullname(mnist.evaluator:MNISTInference.cmp) handler from the working directory, search mnist.evaluator module to get runnable handlers
    swcli model run --workdir . --module mnist.evaluator --handler mnist.evaluator:MNISTInference.cmp
    # run the f handler in th.py from the working directory with the args defined in th:f
    # @handler()
    # def f(
    # x=ListInput(IntInput()),
    # y=2,
    # mi=MyInput(),
    # ds=DatasetInput(required=True),
    # ctx=ContextInput(),
    # )
    swcli model run -w . -m th --handler th:f -- -x 2 -x=1 --mi=blab-la --ds mnist

    # --> run with dataset of head 10
    swcli model run --uri mnist --dataset-head 10 --dataset mnist

    swcli model serve

    swcli [全局选项] model serve [选项]

    model serve 命令可以以Web Server方式运行模型,并提供简易的 Web 交互界面。

    选项必填项类型默认值说明
    --workdir-wNString在本地开发模式中使用,指定 model 代码目录地址。
    --uri-uNString在 model URI模式中使用,指定 model URI。
    --runtime-rNString运行此命令时使用的Starwhale Runtime的 URI。如果指定此选项,该命令将在 Starwhale 运行时指定的独立 Python 环境中运行。否则它将直接在 swcli 当前的 Python 环境中运行。
    --model-yaml-fNString${MODEL_DIR}/model.yamlmodel.yaml 的路径。model.yaml 对于 model serve 是非必须的。
    --module-mNStringPython Module 的名字,是一个可以被 import 的 Python Module 路径。该参数可以被设置多次。
    --hostNString127.0.0.1服务监听的地址
    --portNInteger8080服务监听的端口

    Starwhale 模型 Serving 的例子

    swcli model serve -u mnist
    swcli model serve --uri mnist/version/latest --runtime pytorch/version/latest

    swcli model serve --workdir . --runtime pytorch/version/v0
    swcli model serve --workdir . --runtime pytorch/version/v1 --host 0.0.0.0 --port 8080
    swcli model serve --workdir . --runtime pytorch --module mnist.evaluator

    swcli model tag

    swcli [全局选项] model tag [选项] <MODEL> [TAGS]...

    model tag将标签附加到指定的Starwhale模型版本,同时支持删除和列出所有标签的功能。可以在模型URI中使用标签替代版本ID。

    MODEL是一个模型URI

    每个模型版本可以包含任意数量的标签,但同一模型中不允许有重复的标签名称。

    选项必填项类型默认值说明
    --remove-rNBooleanFalse使用该选项删除标签
    --quiet-qNBooleanFalse使用该选项以忽略错误,例如删除不存在的标签。
    --force-add-fNBooleanFalse当向 server/cloud 实例中添加标签时,若遇到其他版本的模型已经使用该标签会提示报错,强制更新时可以使用 --force-add 参数。

    Starwhale 模型标签的例子

    #- list tags of the mnist model
    swcli model tag mnist

    #- add tags for the mnist model
    swcli model tag mnist t1 t2
    swcli model tag cloud://cloud.starwhale.cn/project/public:starwhale/model/mnist/version/latest t1 --force-add
    swcli model tag mnist t1 --quiet

    #- remove tags for the mnist model
    swcli model tag mnist -r t1 t2
    swcli model tag cloud://cloud.starwhale.cn/project/public:starwhale/model/mnist --remove t1
    - + \ No newline at end of file diff --git a/zh/0.6.0/reference/swcli/project/index.html b/zh/0.6.0/reference/swcli/project/index.html index 330dad959..5b8ed9027 100644 --- a/zh/0.6.0/reference/swcli/project/index.html +++ b/zh/0.6.0/reference/swcli/project/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    swcli project

    Overview

    swcli [全局选项] project [选项] <子命令> [参数]...

    project命令包括以下子命令:

    • create(add, new)
    • info
    • list(ls)
    • recover
    • remove(ls)
    • use(select)

    swcli project create

    swcli [全局选项] project create <PROJECT>

    project create 创建一个新的项目。

    PROJECT 是一个项目URI

    swcli project info

    swcli [全局选项] project info [选项] <PROJECT>

    project info 输出指定项目的详细信息。

    PROJECT 是一个项目URI

    swcli project list

    swcli [全局选项] project list [选项]

    project list 显示所有的项目。

    选项必填项类型默认值说明
    --instanceNString要显示的实例 URI。如果不指定该选项,则显示默认实例.
    --show-removedNBooleanFalse如果使用了该选项,则结果中会包含已删除但未被垃圾回收的项目。
    --pageNInteger1起始页码。仅限 Server 和 Cloud 实例。
    --sizeNInteger20一页中的项目数。仅限 Server 和 Cloud 实例。

    swcli project recover

    swcli [全局选项] project recover [选项] <PROJECT>

    project recover 恢复以前删除的项目。

    PROJECT 是一个项目URI

    已经被垃圾回收或者使用 --force 选项删除的项目无法使用本命令恢复。

    swcli project remove

    swcli [全局选项] project remove [选项] <PROJECT>

    project remove 删除指定的项目。

    PROJECT 是一个项目URI

    被删除的项目可以在垃圾回收之前通过 swcli project recover 恢复。要永久删除某个项目,您可以使用 --force 选项。

    被删除的项目可以通过 swcli project list --show-removed 列出。

    选项必填项类型默认值说明
    --force-fNBooleanFalse使用此选项永久删除某个 Starwhale 模型或版本。删除后不可恢复。

    swcli project use

    swcli [全局选项] project use <PROJECT>

    project use 将指定的项目设置为默认项目。如果要指定 Server/Cloud 实例上的项目,您需要先登录才能运行本命令。

    - + \ No newline at end of file diff --git a/zh/0.6.0/reference/swcli/runtime/index.html b/zh/0.6.0/reference/swcli/runtime/index.html index 54752fcea..5e529a716 100644 --- a/zh/0.6.0/reference/swcli/runtime/index.html +++ b/zh/0.6.0/reference/swcli/runtime/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    swcli runtime

    概述

    swcli [全局选项] runtime [选项] <SUBCOMMAND> [参数]...

    runtime 命令包括以下子命令:

    • activate(actv)
    • build
    • copy(cp)
    • dockerize
    • extract
    • history
    • info
    • list(ls)
    • recover
    • remove(rm)
    • tag

    swcli runtime activate

    swcli [全局选项] runtime activate [选项] <RUNTIME>

    runtime activate 根据指定的运行时创建一个全新的 Python 环境,类似 source venv/bin/activateconda activate xxx 的效果。关闭当前 shell 或切换到其他 shell 后,需要重新激活 Runtime。URI 参数为 Runtime URI。

    对于已经激活的 Starwhale 运行时,如果想要退出该环境,需要在 venv 环境中执行 deactivate 命令或conda环境中执行 conda deactivate 命令。

    runtime activate 命令首次激活环境的时候,会根据 Starwhale 运行时的定义,构建一个 Python 隔离环境,并下载相关的 Python Packages ,可能会花费比较的时间。

    swcli runtime build

    swcli [全局选项] runtime build [选项]

    runtime build 命令可以从多种环境或 runtime.yaml ,构建一个可以分享、可以复现的适合 ML/DL 领域的运行环境。

    参数说明

    • 运行时构建方式的相关参数:
    选项必填项类型默认值说明
    -c--condaNString通过 conda env name 寻找对应的 conda 环境,导出 Python 依赖后生成 Starwhale 运行时。
    -cp--conda-prefixNString通过 conda env prefix 路径寻找对应的 conda 环境,导出 Python 依赖后生成 Starwhale 运行时。
    -v--venvNString通过 venv 目录地址寻找对应的 venv 环境,导出 Python 依赖后生成 Starwhale 运行时。
    -s--shellNString根据当前 shell 环境,导出 Python 依赖后生成 Starwhale 运行时。
    -y--yamlNcwd 目录的 runtime.yaml根据用户自定义的 runtime.yaml 构建 Starwhale 运行时。
    -d--dockerNString将 docker image 作为 Starwhale 运行时。

    运行时构建方式的相关参数是互斥的,只能指定一种方式,如果不指定,则会采用 --yaml 方式读取 cwd 目录下的 runtime.yaml 文件进行 Starwhale 运行时的构建。

    • 其他参数:
    选项必填项作用域类型默认值说明
    --project-pN全局String默认项目项目URI
    -del--disable-env-lockNruntime.yaml 模式BooleanFalse是否安装 runtime.yaml 中的依赖,并锁定相关依赖的版本信息。默认会锁定依赖。
    -nc--no-cacheNruntime.yaml 模式BooleanFalse是否删除隔离环境,全新安装相关依赖。默认会在之前的隔离环境中安装依赖。
    --cudaNconda/venv/shell 模式Choice[11.3/11.4/11.5/11.6/11.7/]CUDA 版本,默认不使用 CUDA。
    --cudnnNconda/venv/shell 模式Choice[8/]cuDNN 版本,默认不使用 cuDNN。
    --archNconda/venv/shell 模式Choice[amd64/arm64/noarch]noarch体系结构
    -dpo--dump-pip-optionsN全局BooleanFalse~/.pip/pip.conf 导出 pip 的配置参数。
    -dcc--dump-condarcN全局BooleanFalse~/.condarc 导出 conda 的配置参数。
    -t--tagN全局String用户自定义标签,可以指定多次。

    Starwhale 运行时构建的例子

    #- from runtime.yaml:
    swcli runtime build # use the current directory as the workdir and use the default runtime.yaml file
    swcli runtime build -y example/pytorch/runtime.yaml # use example/pytorch/runtime.yaml as the runtime.yaml file
    swcli runtime build --yaml runtime.yaml # use runtime.yaml at the current directory as the runtime.yaml file
    swcli runtime build --tag tag1 --tag tag2

    #- from conda name:
    swcli runtime build -c pytorch # lock pytorch conda environment and use `pytorch` as the runtime name
    swcli runtime build --conda pytorch --name pytorch-runtime # use `pytorch-runtime` as the runtime name
    swcli runtime build --conda pytorch --cuda 11.4 # specify the cuda version
    swcli runtime build --conda pytorch --arch noarch # specify the system architecture

    #- from conda prefix path:
    swcli runtime build --conda-prefix /home/starwhale/anaconda3/envs/pytorch # get conda prefix path by `conda info --envs` command

    #- from venv prefix path:
    swcli runtime build -v /home/starwhale/.virtualenvs/pytorch
    swcli runtime build --venv /home/starwhale/.local/share/virtualenvs/pytorch --arch amd64

    #- from docker image:
    swcli runtime build --docker pytorch/pytorch:1.9.0-cuda11.1-cudnn8-runtime # use the docker image as the runtime directly

    #- from shell:
    swcli runtime build -s --cuda 11.4 --cudnn 8 # specify the cuda and cudnn version
    swcli runtime build --shell --name pytorch-runtime # lock the current shell environment and use `pytorch-runtime` as the runtime name

    swcli runtime copy

    swcli [全局选项] runtime copy [选项] <SRC> <DEST>

    runtime copy 将 runtime 从 SRC 复制到 DEST,可以实现不同实例之间的运行时分享。这里 SRCDEST 都是运行时URI

    Starwhale 运行时复制时,默认会带上用户自定义的所有标签,可以使用 --ignore-tag 参数,忽略某些标签。另外,latest^v\d+$ 标签是 Starwhale 系统内建标签,只在当前实例中使用,不会拷贝到其他实例中。

    选项必填项类型默认值说明
    --force-fNBooleanFalse如果为true,DEST已经存在时会被强制覆盖。否则此命令会显示一条错误消息。另外,如果复制时携带的标签已经被其他版本使用,通过该参数可以强制更新标签到此版本上。
    -i--ignore-tagNString可以指定多次,忽略多个用户自定义标签。

    Starwhale 运行时复制的例子

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local project(myproject) with a new runtime name 'mnist-local'
    swcli runtime cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq local/project/myproject/mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local default project(self) with the cloud instance runtime name 'mnist-cloud'
    swcli runtime cp cloud://pre-k8s/project/runtime/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq .

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local project(myproject) with the cloud instance runtime name 'mnist-cloud'
    swcli runtime cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq . -dlp myproject

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local default project(self) with a runtime name 'mnist-local'
    swcli runtime cp cloud://pre-k8s/project/runtime/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local project(myproject) with a runtime name 'mnist-local'
    swcli runtime cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local -dlp myproject

    #- copy standalone instance(local) default project(self)'s mnist-local runtime to cloud instance(pre-k8s) mnist project with a new runtime name 'mnist-cloud'
    swcli runtime cp mnist-local/version/latest cloud://pre-k8s/project/mnist/mnist-cloud

    #- copy standalone instance(local) default project(self)'s mnist-local runtime to cloud instance(pre-k8s) mnist project with standalone instance runtime name 'mnist-local'
    swcli runtime cp mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy standalone instance(local) default project(self)'s mnist-local runtime to cloud instance(pre-k8s) mnist project without 'cloud://' prefix
    swcli runtime cp mnist-local/version/latest pre-k8s/project/mnist

    #- copy standalone instance(local) project(myproject)'s mnist-local runtime to cloud instance(pre-k8s) mnist project with standalone instance runtime name 'mnist-local'
    swcli runtime cp local/project/myproject/runtime/mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy without some tags
    swcli runtime cp pytorch cloud://cloud.starwhale.cn/project/starwhale:public --ignore-tag t1

    swcli runtime dockerize

    swcli [全局选项] runtime dockerize [选项] <RUNTIME>

    runtime dockerize 基于指定的 runtime 创建一个 docker 镜像。Starwhale 使用 docker buildx 来创建镜像。运行此命令需要预先安装 Docker 19.03 以上的版本。

    RUNTIME是一个运行时URI

    选项必填项类型默认值说明
    --tag or -tNStringDocker镜像的tag,该选项可以重复多次。
    --pushNBooleanFalse是否将创建的镜像推送到docker registry。
    --platformNStringamd64镜像的运行平台,可以是amd64或者arm64。该选项可以重复多次用于创建多平台镜像。
    --dry-runNBooleanFalse只生成 Dockerfile 不实际生成和推送镜像。

    swcli runtime extract

    swcli [全局选项] runtime extract [选项] <RUNTIME>

    Starwhale 运行时以压缩包的方式分发,使用 runtime extract 命令可以解压运行时 Package,然后进行后续的自定义修改。

    选项必填项类型默认值说明
    --force-fNBooleanFalse如果目标目录已经有解压好的 Starwhale 运行时,是否删除后重新解压。
    --target-dirNString自定义解压的目录,如果不指定则会放到 Starwhale 默认的运行时 workdir 目录中,命令输出日志中会提示。

    swcli runtime history

    swcli [全局选项] runtime history [选项] <RUNTIME>

    runtime history输出指定Starwhale运行时的所有历史版本。

    RUNTIME是一个运行时URI

    选项必填项类型默认值说明
    --fullnameNBooleanFalse显示完整的版本名称。如果没有使用该选项,则仅显示前 12 个字符。

    swcli runtime info

    swcli [全局选项] runtime info [选项] <RUNTIME>

    runtime info输出指定Starwhale运行时版本的详细信息。

    RUNTIME是一个运行时URI

    选项必填项类型默认值说明
    --output-filter-ofNChoice of [basic/runtime_yaml/manifest/lock/all]basic设置输出的过滤规则,比如只显示Runtime的runtime.yaml。目前该参数仅对Standalone Instance的Runtime生效。

    Starwhale 运行时查看详情的例子

    swcli runtime info pytorch # show basic info from the latest version of runtime
    swcli runtime info pytorch/version/v0 # show basic info
    swcli runtime info pytorch/version/v0 --output-filter basic # show basic info
    swcli runtime info pytorch/version/v1 -of runtime_yaml # show runtime.yaml content
    swcli runtime info pytorch/version/v1 -of lock # show auto lock file content
    swcli runtime info pytorch/version/v1 -of manifest # show _manifest.yaml content
    swcli runtime info pytorch/version/v1 -of all # show all info of the runtime

    swcli runtime list

    swcli [全局选项] runtime list [选项]

    runtime list显示所有的 Starwhale 运行时。

    选项必填项类型默认值说明
    --projectNString要查看的项目的URI。如果未指定此选项,则使用默认项目替代。
    --fullnameNBooleanFalse显示完整的版本名称。如果没有使用该选项,则仅显示前 12 个字符。
    --show-removedNBooleanFalse如果使用了该选项,则结果中会包含已删除但未被垃圾回收的运行时。
    --pageNInteger1起始页码。仅限Server和Cloud实例。
    --sizeNInteger20一页中的运行时数量。仅限Server和Cloud实例。
    --filter-flNString仅显示符合条件的运行时。该选项可以在一个命令中被多次重复使用。
    过滤器类型说明范例
    nameKey-Value运行时名称前缀--filter name=pytorch
    ownerKey-Value运行时所有者名字--filter owner=starwhale
    latestFlag如果指定了该选项,结果中仅显示最新版本。--filter latest

    swcli runtime recover

    swcli [全局选项] runtime recover [选项] <RUNTIME>

    runtime recover 命令可以恢复以前删除的 Starwhale 运行时。

    RUNTIME是一个运行时URI。如果URI不包含版本,则会恢复所有删除的版本。

    已经被垃圾回收或者使用 --force 选项删除的 Starwhale 运行时或版本无法使用本命令恢复。

    选项必填项类型默认值说明
    --force-fNBooleanFalse如果使用了该选项,当前同名的Starwhale运行时或版本会被强制覆盖。

    swcli runtime remove

    swcli [全局选项] runtime remove [选项] <RUNTIME>

    runtime remove 命令可以删除指定的 Starwhale 运行时或某个版本。

    RUNTIME 是一个运行时URI。如果 URI 不包含版本,则删除所有版本。

    被删除的 Starwhale 运行时或版本可以在垃圾回收之前通过 swcli runtime recover 命令恢复。要永久删除某个 Starwhale 运行时或版本,您可以使用 --force 选项。

    被删除的 Starwhale 运行时或版本可以通过 swcli runtime list --show-removed 列出。

    选项必填项类型默认值说明
    --force-fNBooleanFalse使用此选项永久删除某个 Starwhale 运行时或版本。删除后不可恢复。

    swcli runtime tag

    swcli [全局选项] runtime tag [选项] <RUNTIME> [TAGS]...

    runtime tag 命令将标签附加到指定的 Starwhale 运行时版本,同时支持删除和列出所有标签的功能。可以在运行时URI中使用标签替代版本 ID。

    RUNTIME 是一个运行时URI

    每个运行时版本可以包含任意数量的标签,但同一运行时中不允许有重复的标签名称。

    runtime tag仅适用于 Standalone 实例.

    选项必填项类型默认值说明
    --remove-rNBooleanFalse使用该选项删除标签
    --quiet-qNBooleanFalse使用该选项以忽略错误,例如删除不存在的标签。
    --force-add-fNBooleanFalse当向 server/cloud 实例中添加标签时,若遇到其他版本的运行时已经使用该标签会提示报错,强制更新时可以使用 --force-add 参数。

    Starwhale 运行时标签的例子

    #- list tags of the pytorch runtime
    swcli runtime tag pytorch

    #- add tags for the pytorch runtime
    swcli runtime tag mnist t1 t2
    swcli runtime tag cloud://cloud.starwhale.cn/project/public:starwhale/runtime/pytorch/version/latest t1 --force-add
    swcli runtime tag mnist t1 --quiet

    #- remove tags for the pytorch runtime
    swcli runtime tag mnist -r t1 t2
    swcli runtime tag cloud://cloud.starwhale.cn/project/public:starwhale/runtime/pytorch --remove t1
    - + \ No newline at end of file diff --git a/zh/0.6.0/reference/swcli/utilities/index.html b/zh/0.6.0/reference/swcli/utilities/index.html index cb707cf11..ea3ed441b 100644 --- a/zh/0.6.0/reference/swcli/utilities/index.html +++ b/zh/0.6.0/reference/swcli/utilities/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    其他命令

    swcli gc

    swcli [全局选项] gc [选项]

    gc根据内部的垃圾回收策略清理已经被删除的项目、模型、数据集和运行时。

    选项必填项类型默认值说明
    --dry-runNBooleanFalse如果为真,仅输出将被删除的对象而不清理。
    --yesNBooleanFalse跳过所有需要确认的项目。

    swcli check

    swcli [全局选项] check

    检查 swcli 命令的外部依赖是否满足条件,目前主要检查 Docker 和 Conda。

    swcli completion install

    swcli [全局选项] completion install <SHELL_NAME>

    安装 swcli 命令补全,目前支持 bash, zsh 和 fish。如果不指定 SHELL_NAME,则尝试主动探测当前shell类型。

    swcli config edit

    swcli [全局选项] config edit

    编辑 Starwhale 配置文件,即 ~/.config/starwhale/config.yaml

    swcli ui

    swcli [全局选项] ui <INSTANCE>

    打开对应实例的Web页面。

    - + \ No newline at end of file diff --git a/zh/0.6.0/runtime/index.html b/zh/0.6.0/runtime/index.html index 4c1b85081..1266e2916 100644 --- a/zh/0.6.0/runtime/index.html +++ b/zh/0.6.0/runtime/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    Starwhale 运行时

    overview

    概览

    Starwhale 运行时能够针对运行Python程序,提供一种可复现、可分享的运行环境。使用 Starwhale 运行时,可以非常容易的与他人分享,并且能在 Starwhale Server 和 Starwhale Cloud 实例上使用 Starwhale 运行时。

    Starwhale 运行时使用 venv, conda 和 docker 等基础技术,如果您当前正在使用这些技术,可以非常容易的将这个环境转化为 Starwhale 运行时。

    对于本地环境,Starwhale 运行时支持非常容易的多种环境管理和切换。Starwhale 运行时包含基础镜像和环境依赖两个部分。

    基础镜像

    Starwhale 基础镜像中会安装 Python, CUDA, cuDNN 和其他一些机器学习开发中必要的基础库。Starwhale 运行时提供多种基础镜像供选择,列表如下:

    • 体系结构:
      • X86 (amd64)
      • Arm (aarch64)
    • 操作系统:
      • Ubuntu 20.04 LTS (ubuntu:20.04)
    • Python:
      • 3.7
      • 3.8
      • 3.9
      • 3.10
      • 3.11
    • CUDA:
      • CUDA 11.3 + cuDNN 8.4
      • CUDA 11.4 + cuDNN 8.4
      • CUDA 11.5 + cuDNN 8.4
      • CUDA 11.6 + cuDNN 8.4
      • CUDA 11.7

    runtime.yaml 通过相关设置来决定使用何种基础镜像。

    - + \ No newline at end of file diff --git a/zh/0.6.0/runtime/yaml/index.html b/zh/0.6.0/runtime/yaml/index.html index 84756ed7c..0a5b447d6 100644 --- a/zh/0.6.0/runtime/yaml/index.html +++ b/zh/0.6.0/runtime/yaml/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    runtime.yaml 使用指南

    runtime.yaml 是构建 Starwhale 运行时的描述文件,用户可以细粒度的定义 Starwhale 运行时的各种属性。当使用 swcli runtime build 命令中 yaml 模式时,需要提供 runtime.yaml 文件。

    使用示例

    最简示例

    dependencies:
    - pip:
    - numpy
    name: simple-test

    定义一个以 venv 作为Python 包隔离方式,安装numpy依赖的 Starwhale 运行时。

    llama2 示例

    name: llama2
    mode: venv
    environment:
    arch: noarch
    os: ubuntu:20.04
    cuda: 11.7
    python: "3.10"
    dependencies:
    - pip:
    - torch
    - fairscale
    - fire
    - sentencepiece
    - gradio >= 3.37.0
    # external starwhale dependencies
    - starwhale[serve] >= 0.5.5

    完整字段示例

    # [required]The name of Starwhale Runtime
    name: demo
    # [optional]The mode of Starwhale Runtime: venv or conda. Default is venv.
    mode: venv
    # [optional]The configurations of pip and conda.
    configs:
    # If you do not use conda, ignore this field.
    conda:
    condarc: # custom condarc config file
    channels:
    - defaults
    show_channel_urls: true
    default_channels:
    - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main
    - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/r
    - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/msys2
    custom_channels:
    conda-forge: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
    pytorch: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
    pytorch-lts: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
    nvidia: https://mirrors.aliyun.com/anaconda/cloud
    ssl_verify: false
    default_threads: 10
    pip:
    # pip config set global.index-url
    index_url: https://example.org/
    # pip config set global.extra-index-url
    extra_index_url: https://another.net/
    # pip config set install.trusted-host
    trusted_host:
    - example.org
    - another.net
    # [optional] The definition of the environment.
    environment:
    # Now it must be ubuntu:20.04
    os: ubuntu:20.04
    # CUDA version. possible values: 11.3, 11.4, 11.5, 11.6, 11.7
    cuda: 11.4
    # Python version. possible values: 3.7, 3.8, 3.9, 3.10, 3.11
    python: 3.8
    # Define your custom base image
    docker:
    image: mycustom.com/docker/image:tag
    # [required] The dependencies of the Starwhale Runtime.
    dependencies:
    # If this item is present, conda env create -f conda.yml will be executed
    - conda.yaml
    # If this item is present, pip install -r requirements.txt will be executed before installing other pip packages
    - requirements.txt
    # Packages to be install with conda. venv mode will ignore the conda field.
    - conda:
    - numpy
    - requests
    # Packages to be installed with pip. The format is the same as requirements.txt
    - pip:
    - pillow
    - numpy
    - deepspeed==0.9.0
    - safetensors==0.3.0
    - transformers @ git+https://github.com/huggingface/transformers.git@3c3108972af74246bc3a0ecf3259fd2eafbacdef
    - peft @ git+https://github.com/huggingface/peft.git@fcff23f005fc7bfb816ad1f55360442c170cd5f5
    - accelerate @ git+https://github.com/huggingface/accelerate.git@eba6eb79dc2ab652cd8b44b37165a4852768a8ac
    # Additional wheels packages to be installed when restoring the runtime
    - wheels:
    - dummy-0.0.0-py3-none-any.whl
    # Additional files to be included in the runtime
    - files:
    - dest: bin/prepare.sh
    name: prepare
    src: scripts/prepare.sh
    # Run some custom commands
    - commands:
    - apt-get install -y libgl1
    - touch /tmp/runtime-command-run.flag
    - + \ No newline at end of file diff --git a/zh/0.6.0/server/guides/server_admin/index.html b/zh/0.6.0/server/guides/server_admin/index.html index ac8a382bb..f407ff862 100644 --- a/zh/0.6.0/server/guides/server_admin/index.html +++ b/zh/0.6.0/server/guides/server_admin/index.html @@ -10,14 +10,14 @@ - +
    版本:0.6.0

    Starwhale Server 系统设置

    超级管理员密码重置

    一旦您忘记了超级管理员的密码, 您可以通过下面的SQL语句将密码重置为 abcd1234

    update user_info set user_pwd='ee9533077d01d2d65a4efdb41129a91e', user_pwd_salt='6ea18d595773ccc2beacce26' where id=1

    重置后,您可以使用上述密码登录到console。 然后再次修改密码为您想要的密码。

    系统设置

    您可以在 Starwhale Server Web 界面中对系统设置进行更改,目前支持runtime的docker镜像源修改以及资源池的划分等。下面是系统设置的一个例子:

    dockerSetting:
    registryForPull: "docker-registry.starwhale.cn/star-whale"
    registryForPush: ""
    userName: ""
    password: ""
    insecure: true
    pypiSetting:
    indexUrl: ""
    extraIndexUrl: ""
    trustedHost: ""
    retries: 10
    timeout: 90
    imageBuild:
    resourcePool: ""
    image: ""
    clientVersion: ""
    pythonVersion: ""
    datasetBuild:
    resourcePool: ""
    image: ""
    clientVersion: ""
    pythonVersion: ""
    resourcePoolSetting:
    - name: "default"
    nodeSelector: null
    resources:
    - name: "cpu"
    max: null
    min: null
    defaults: 5.0
    - name: "memory"
    max: null
    min: null
    defaults: 3145728.0
    - name: "nvidia.com/gpu"
    max: null
    min: null
    defaults: null
    tolerations: null
    metadata: null
    isPrivate: null
    visibleUserIds: null
    storageSetting:
    - type: "minio"
    tokens:
    bucket: "users"
    ak: "starwhale"
    sk: "starwhale"
    endpoint: "http://10.131.0.1:9000"
    region: "local"
    hugeFileThreshold: "10485760"
    hugeFilePartSize: "5242880"
    - type: "s3"
    tokens:
    bucket: "users"
    ak: "starwhale"
    sk: "starwhale"
    endpoint: "http://10.131.0.1:9000"
    region: "local"
    hugeFileThreshold: "10485760"
    hugeFilePartSize: "5242880"

    镜像源设置

    Server 下发的 Tasks 都是基于 docker 实现的,Starwhale Server 支持自定义镜像源,包括 dockerSetting.registryForPushdockerSetting.registryForPull

    资源池设置

    资源池实现了集群机器分组的功能。用户在创建任务时可以通过选择资源池将自己的任务下发到想要的机器组中。资源池可以理解为 Kubernetes 中的 nodeSelector,所以当您在K8S集群中给机器打上标签后,就可以在这里配置您的 resourcePool

    存储设置

    您可以通过存储设置来配置 Starwhale Server可以访问那些存储介质:

    storageSetting:
    - type: s3
    tokens:
    - bucket: starwhale # required
    ak: access_key # required
    sk: scret_key # required
    endpoint: http://s3.region.amazonaws.com # optional
    region: region of the service # required when endpoint is empty
    hugeFileThreshold: 10485760 # bigger than 10MB will use multiple part upload
    hugeFilePartSize: 5242880 # MB part size for multiple part upload
    - type: minio
    tokens:
    - bucket: starwhale # required
    ak: access_key # required
    sk: scret_key # required
    endpoint: http://10.131.0.1:9000 # required
    region: local # optional
    hugeFileThreshold: 10485760 # bigger than 10MB will use multiple part upload
    hugeFilePartSize: 5242880 # MB part size for multiple part upload
    - type: aliyun
    tokens:
    - bucket: starwhale # required
    ak: access_key # required
    sk: scret_key # required
    endpoint: http://10.131.0.2:9000 # required
    region: local # optional
    hugeFileThreshold: 10485760 # bigger than 10MB will use multiple part upload
    hugeFilePartSize: 5242880 # MB part size for multiple part upload

    每一个 storageSetting 条目都应该有一个StorageAccessService接口的实现. Starwhale目前有四个内置的实现:

    • StorageAccessServiceAliyun 可以处理 typealiyun 或者 oss 的条目
    • StorageAccessServiceMinio 可以处理typeminio 的条目
    • StorageAccessServiceS3 可以处理 types3 的条目
    • StorageAccessServiceFile 可以处理 typefs 或者 file 的条目

    不同的实现对 tokens 的要求是不一样的. 当 typealiyunminio或者oss的时候 endpoint 是 必填的。 当 endpoint 为空并且 types3 的时候 region 必填的。 而 fs/file 类型的存储则需要 rootDirserviceProvider 作为tokens的key. 更多细节请参阅代码。

    - + \ No newline at end of file diff --git a/zh/0.6.0/server/index.html b/zh/0.6.0/server/index.html index 4521533f3..b030acfae 100644 --- a/zh/0.6.0/server/index.html +++ b/zh/0.6.0/server/index.html @@ -10,13 +10,13 @@ - + - + \ No newline at end of file diff --git a/zh/0.6.0/server/installation/docker-compose/index.html b/zh/0.6.0/server/installation/docker-compose/index.html index 78ea34e5c..a84baf7ce 100644 --- a/zh/0.6.0/server/installation/docker-compose/index.html +++ b/zh/0.6.0/server/installation/docker-compose/index.html @@ -10,14 +10,14 @@ - +
    版本:0.6.0

    使用Docker Compose安装Starwhale

    先决条件

    安装方法

    启动服务

    wget https://raw.githubusercontent.com/star-whale/starwhale/main/docker/compose/compose.yaml
    GLOBAL_IP=${your_accessible_ip_for_server} ; docker compose up

    GLOBAL_IP 需要是可以被所有 swcli 访问到的,包括用户实际使用的swcli和container内部的swcli. 如果不能访问,请确认您的防火墙设置.

    compose.yaml 包含了Mysql数据库,MinIO存储和Controller服务. 创建一个 compose.override.yaml, 可以覆盖 compose.yaml 中的配置. 如何配置可以参考此处

    - + \ No newline at end of file diff --git a/zh/0.6.0/server/installation/docker/index.html b/zh/0.6.0/server/installation/docker/index.html index c8965dcde..3ff8bcfbf 100644 --- a/zh/0.6.0/server/installation/docker/index.html +++ b/zh/0.6.0/server/installation/docker/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    使用 Docker 安装 Starwhale Server

    先决条件

    • 1.19或者更高版本的Kubernetes集群用于执行任务。
    • MySQL 8.0以上版本的数据库实例用于存储元数据。
    • 兼容S3接口的对象存储,用于保存数据集、模型等。

    请确保您的Kubernetes集群上的pod可以访问Starwhale Server侦听的端口。

    为Docker准备env文件

    Starwhale Server可以通过环境变量进行配置。

    Docker的env文件模板参考此处。您可以通过修改模板来创建自己的env文件。

    准备kubeconfig文件[可选][SW_SCHEDULER=k8s]

    kubeconfig文件用于访问Kubernetes集群。 有关kubeconfig文件的更多信息,请参阅官方Kubernetes文档

    如果您安装了kubectl命令行工具,可以运行 kubectl config view 来查看您当前的配置。

    启动Docker镜像

    docker run -it -d --name starwhale-server -p 8082:8082 \
    --restart unless-stopped \
    --mount type=bind,source=<您的kubeconfig文件路径>,destination=/root/.kube/config,readonly \
    --env-file <您的env文件路径> \
    docker-registry.starwhale.cn/star-whale/server:0.5.6

    对于非中国大陆网络用户,可以使用托管在 ghcr.io 上的镜像: ghcr.io/star-whale/server

    - + \ No newline at end of file diff --git a/zh/0.6.0/server/installation/helm-charts/index.html b/zh/0.6.0/server/installation/helm-charts/index.html index e93f6d44b..44fc7b67b 100644 --- a/zh/0.6.0/server/installation/helm-charts/index.html +++ b/zh/0.6.0/server/installation/helm-charts/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    使用 Helm 安装 Starwhale Server

    先决条件

    • 1.19或者更高版本的Kubernetes集群用于执行任务。
    • MySQL 8.0以上版本的数据库实例用于存储元数据。
    • 兼容S3接口的对象存储,用于保存数据集、模型等。
    • Helm 3.2.0+。

    Starwhale Helm charts 包括 MySQL 和 MinIO 作为依赖项。如果您没有自己的 MySQL 实例或任何与 AWS S3 兼容的对象存储可用,可以通过 Helm Chats 进行安装。请查看下文的安装选项以了解如何在安装 Starwhale Server 的同时安装 MySQL 和 MinIO。

    在 Kubernetes 上为 Starwhale Server 创建一个服务账号

    如果您的 Kubernetes 集群启用了 RBAC(在 Kubernetes 1.6+中,默认启用 RBAC),Starwhale Server 将无法正常工作,除非由至少具有以下权限的服务帐户启动:

    ResourceAPI GroupGetListWatchCreateDelete
    jobsbatchYYYYY
    podscoreYYY
    nodescoreYYY
    events""Y

    例子:

    apiVersion: rbac.authorization.k8s.io/v1
    kind: ClusterRole
    metadata:
    name: starwhale-role
    rules:
    - apiGroups:
    - ""
    resources:
    - pods
    - nodes
    verbs:
    - get
    - list
    - watch
    - apiGroups:
    - "batch"
    resources:
    - jobs
    verbs:
    - create
    - get
    - list
    - watch
    - delete
    - apiGroups:
    - ""
    resources:
    - events
    verbs:
    - get
    - watch
    - list
    ---
    apiVersion: rbac.authorization.k8s.io/v1
    kind: ClusterRoleBinding
    metadata:
    name: starwhale-binding
    roleRef:
    apiGroup: rbac.authorization.k8s.io
    kind: ClusterRole
    name: starwhale-role
    subjects:
    - kind: ServiceAccount
    name: starwhale

    下载 Starwhale Helm chart

    helm repo add starwhale https://star-whale.github.io/charts
    helm repo update

    安装Starwhale Server

    helm install starwhale-server starwhale/starwhale-server -n starwhale --create-namespace

    如果您安装了kubectl命令行工具,您可以运行 kubectl get pods -n starwhale 来检查是否所有 pod 都在正常运行中。

    更新 Starwhale Server

    helm repo update
    helm upgrade starwhale-server starwhale/starwhale-server

    卸载 Starwhale Server

    helm delete starwhale-server
    - + \ No newline at end of file diff --git a/zh/0.6.0/server/installation/index.html b/zh/0.6.0/server/installation/index.html index ea8264386..f10b0442e 100644 --- a/zh/0.6.0/server/installation/index.html +++ b/zh/0.6.0/server/installation/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    Starwhale Server 安装指南

    Starwhale Server 以 Docker 镜像的形式发布。您可以直接使用 Docker 运行,也可以部署到 Kubernetes 集群上。

    - + \ No newline at end of file diff --git a/zh/0.6.0/server/installation/minikube/index.html b/zh/0.6.0/server/installation/minikube/index.html index 50fbe0cfc..079cb55f9 100644 --- a/zh/0.6.0/server/installation/minikube/index.html +++ b/zh/0.6.0/server/installation/minikube/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    使用 Minikube 安装 Starwhale Server

    先决条件

    启动 Minikube

    minikube start --addons ingress --image-mirror-country=cn --kubernetes-version=1.25.3

    对于非中国大陆网络用户,可以省略 --image-mirror-country=cn 参数。另外,如果在您的机器上没有安装 kubectl,可以使用 Minikube 自带的 kubectl: minikube kubectl 或 bashrc中增加 alias kubectl="minikube kubectl --"

    安装 Starwhale Server

    helm repo add starwhale https://star-whale.github.io/charts
    helm repo update
    helm pull starwhale/starwhale --untar --untardir ./charts

    helm upgrade --install starwhale ./charts/starwhale -n starwhale --create-namespace -f ./charts/starwhale/values.minikube.cn.yaml

    对于非中国大陆网络用户,可以使用 values.minikube.global.yaml,命令如下:

    helm upgrade --install starwhale ./charts/starwhale -n starwhale --create-namespace -f ./charts/starwhale/values.minikube.global.yaml

    当成功安装后,会有类似如下的提示信息输出:

        Release "starwhale" has been upgraded. Happy Helming!
    NAME: starwhale
    LAST DEPLOYED: Tue Feb 14 16:25:03 2023
    NAMESPACE: starwhale
    STATUS: deployed
    REVISION: 14
    NOTES:
    ******************************************
    Chart Name: starwhale
    Chart Version: 0.5.6
    App Version: latest
    Starwhale Image:
    - server: ghcr.io/star-whale/server:latest

    ******************************************
    Controller:
    - visit: http://controller.starwhale.svc
    Minio:
    - web visit: http://minio.starwhale.svc
    - admin visit: http://minio-admin.starwhale.svc
    MySQL:
    - port-forward:
    - run: kubectl port-forward --namespace starwhale svc/mysql 3306:3306
    - visit: mysql -h 127.0.0.1 -P 3306 -ustarwhale -pstarwhale
    Please run the following command for the domains searching:
    echo "$(sudo minikube ip) controller.starwhale.svc minio.starwhale.svc minio-admin.starwhale.svc " | sudo tee -a /etc/hosts
    ******************************************
    Login Info:
    - starwhale: u:starwhale, p:abcd1234
    - minio admin: u:minioadmin, p:minioadmin

    *_* Enjoy to use Starwhale Platform. *_*

    检查 Starwhale Server 状态

    Minikube 方式启动 Starwhale Server 一般要用时3-5分钟,可以输出如下命令检查是否完成启动:

    kubectl get deployments -n starwhale
    NAMEREADYUP-TO-DATEAVAILABLEAGE
    controller1/1115m
    minio1/1115m
    mysql1/1115m

    本机访问的网络配置

    输出如下命令后,就可以在浏览器中通过 http://controller.starwhale.svc 访问 Starwhale Server:

    echo "$(sudo minikube ip) controller.starwhale.svc minio.starwhale.svc  minio-admin.starwhale.svc " | sudo tee -a /etc/hosts

    其他机器访问的网络配置

    • 步骤1: 在 Starwhale Server 所在机器上

      使用 socat 命令做临时的端口转发,命令如下:

      # install socat at first, ref: https://howtoinstall.co/en/socat
      sudo socat TCP4-LISTEN:80,fork,reuseaddr,bind=0.0.0.0 TCP4:`minikube ip`:80

      当您停掉socat进程后,端口转发会被禁止,其他机器的访问也会被禁止。如果想长期开启端口转发,可以使用 iptables 命令。

    • 步骤2: 在其他机器上

      在 hosts 文件添加相关域名映射,命令如下:

      # for macOSX or Linux environment, run the command in the shell.
      echo ${your_machine_ip} controller.starwhale.svc minio.starwhale.svc minio-admin.starwhale.svc " | sudo tee -a /etc/hosts

      # for Windows environment, run the command in the PowerShell with administrator permission.
      Add-Content -Path C:\Windows\System32\drivers\etc\hosts -Value "`n${your_machine_ip} controller.starwhale.svc minio.starwhale.svc minio-admin.starwhale.svc"
    - + \ No newline at end of file diff --git a/zh/0.6.0/server/installation/starwhale_env/index.html b/zh/0.6.0/server/installation/starwhale_env/index.html index 88f8a63fc..e7bd689a9 100644 --- a/zh/0.6.0/server/installation/starwhale_env/index.html +++ b/zh/0.6.0/server/installation/starwhale_env/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    Starwhale 环境变量文件示例

    ################################################################################
    # *** Required ***
    # The external Starwhale server URL. For example: https://cloud.starwhale.ai
    SW_INSTANCE_URI=

    # The listening port of Starwhale Server
    SW_CONTROLLER_PORT=8082

    # The maximum upload file size. This setting affects datasets and models uploading when copied from outside.
    SW_UPLOAD_MAX_FILE_SIZE=20480MB
    ################################################################################
    # The base URL of the Python Package Index to use when creating a runtime environment.
    SW_PYPI_INDEX_URL=http://10.131.0.1/repository/pypi-hosted/simple/

    # Extra URLs of package indexes to use in addition to the base url.
    SW_PYPI_EXTRA_INDEX_URL=

    # Space separated hostnames. When any host specified in the base URL or extra URLs does not have a valid SSL
    # certification, use this option to trust it anyway.
    SW_PYPI_TRUSTED_HOST=
    ################################################################################
    # The JWT token expiration time. When the token expires, the server will request the user to login again.
    SW_JWT_TOKEN_EXPIRE_MINUTES=43200

    # *** Required ***
    # The JWT secret key. All strings are valid, but we strongly recommend you to use a random string with at least 16 characters.
    SW_JWT_SECRET=
    ################################################################################
    # The scheduler controller to use. Valid values are:
    # docker: Controller schedule jobs by leveraging docker
    # k8s: Controller schedule jobs by leveraging Kubernetes
    SW_SCHEDULER=k8s

    # The Kubernetes namespace to use when running a task when SW_SCHEDULER is k8s
    SW_K8S_NAME_SPACE=default

    # The path on the Kubernetes host node's filesystem to cache Python packages. Use the setting only if you have
    # the permission to use host node's filesystem. The runtime environment setup process may be accelerated when the host
    # path cache is used. Leave it blank if you do not want to use it.
    SW_K8S_HOST_PATH_FOR_CACHE=

    # The ip for the containers created by Controller when SW_SCHEDULER is docker
    SW_DOCKER_CONTAINER_NODE_IP=127.0.0.1
    ###############################################################################
    # *** Required ***
    # The object storage system type. Valid values are:
    # s3: [AWS S3](https://aws.amazon.com/s3) or other s3-compatible object storage systems
    # aliyun: [Aliyun OSS](https://www.alibabacloud.com/product/object-storage-service)
    # minio: [MinIO](https://min.io)
    # file: Local filesystem
    SW_STORAGE_TYPE=

    # The path prefix for all data saved on the storage system.
    SW_STORAGE_PREFIX=
    ################################################################################
    # The following settings are only used when SW_STORAGE_TYPE is file.

    # The root directory to save data.
    # This setting is only used when SW_STORAGE_TYPE is file.
    SW_STORAGE_FS_ROOT_DIR=/usr/local/starwhale
    ################################################################################
    # The following settings are only used when SW_STORAGE_TYPE is not file.

    # *** Required ***
    # The name of the bucket to save data.
    SW_STORAGE_BUCKET=

    # *** Required ***
    # The endpoint URL of the object storage service.
    # This setting is only used when SW_STORAGE_TYPE is s3 or aliyun.
    SW_STORAGE_ENDPOINT=

    # *** Required ***
    # The access key used to access the object storage system.
    SW_STORAGE_ACCESSKEY=

    # *** Required ***
    # The secret access key used to access the object storage system.
    SW_STORAGE_SECRETKEY=

    # *** Optional ***
    # The region of the object storage system.
    SW_STORAGE_REGION=

    # Starwhale Server will use multipart upload when uploading a large file. This setting specifies the part size.
    SW_STORAGE_PART_SIZE=5MB
    ################################################################################
    # MySQL settings

    # *** Required ***
    # The hostname/IP of the MySQL server.
    SW_METADATA_STORAGE_IP=

    # The port of the MySQL server.
    SW_METADATA_STORAGE_PORT=3306

    # *** Required ***
    # The database used by Starwhale Server
    SW_METADATA_STORAGE_DB=starwhale

    # *** Required ***
    # The username of the MySQL server.
    SW_METADATA_STORAGE_USER=

    # *** Required ***
    # The password of the MySQL server.
    SW_METADATA_STORAGE_PASSWORD=
    ################################################################################

    # 用于缓存WAL文件的目录。请将其指向一个有足够空间的挂载卷或主机路径。
    # 如果不设置,WAL文件将保存在docker运行时层,当容器重启时cache数据将丢失。
    SW_DATASTORE_WAL_LOCAL_CACHE_DIR=
    - + \ No newline at end of file diff --git a/zh/0.6.0/server/project/index.html b/zh/0.6.0/server/project/index.html index dda7c82ed..3ae233d68 100644 --- a/zh/0.6.0/server/project/index.html +++ b/zh/0.6.0/server/project/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    How to Organize and Manage Resources with Starwhale Projects

    Project is the basic unit for organizing and managing resources (such as models, datasets, runtime environments, etc.). You can create and manage projects based on your needs. For example, you can create projects by business team, product line, or models. One user can create and participate in one or more projects.

    Project type

    There are two types of projects:

    • Private project: The project (and related resources in the project) is only visible to project members with permission. Project members can view or edit the project (as well as associated resources in the project). For more information on roles, please take a look at Roles and permissions in Starwhale.

    • Public project: The project (and related resources in the project) is visible to all Starwhale users. Project members can view or edit the project (as well as associated resources in the project). For more information on roles, please take a look at Roles and permissions in Starwhale.

    Create a project

    1. Click the Create button in the upper right corner of the project list page;
    2. Enter a name for the project. Pay attention to avoiding duplicate names. For more information, please see Names in Starwhale
    3. Select the Project Type, which is defaulted to private project and can be selected as public according to needs;
    4. Fill in the description content;
    5. To finish, Click the Submit button.

    Edit a project

    The name, privacy and description of a project can be edited.

    1. Go to the project list page and find the project that needs to be edited by searching for the project name, then click the Edit Project button;
    2. Edit the items that need to be edited;
    3. Click Submit to save the edited content;
    4. If you're editing multiple projects, repeat steps 1 through 3.

    View a project

    My projects

    On the project list page, only my projects are displayed by default. My projects refer to the projects participated in by the current users as project members or project owners.

    Project sorting

    On the project list page, all projects are supported to be sorted by "Recently visited", "Project creation time from new to old", and "Project creation time from old to new", which can be selected according to your needs.

    Delete a project

    Once a project is deleted, all related resources (such as datasets, models, runtimes, evaluations, etc.) will be deleted and cannot be restored.

    1. Enter the project list page and search for the project name to find the project that needs to be deleted. Hover your mouse over the project you want to delete, then click the Delete button;
    2. Follow the prompts, enter the relevant information, click Confirm to delete the project, or click Cancel to cancel the deletion;
    3. If you are deleting multiple projects, repeat the above steps.

    Manage project member

    Only users with the admin role can assign people to the project. The project owner defaulted to having the project owner role.

    Add a member

    1. Click Manage Members to go to the project member list page;
    2. Click the Add Member button in the upper right corner.
    3. Enter the Username you want to add, select a project role for the user in the project.
    4. Click submit to complete.
    5. If you're adding multiple members, repeat steps 1 through 4.

    Remove a member

    1. On the project list page or project overview tab, click Manage Members to go to the project member list page.
    2. Search for the username you want to delete, then click the Delete button.
    3. Click Yes to delete the user from this project, click No to cancel the deletion.
    4. If you're removing multiple members, repeat steps 1 through 3.

    Edit a member's role

    1. Hover your mouse over the project you want to edit, then click Manage Members to go to the project member list page.
    2. Find the username you want to adjust through searching, click the Project Role drop-down menu, and select a new project role. For more information on roles, please take a look at Roles and permissions in Starwhale.
    - + \ No newline at end of file diff --git a/zh/0.6.0/swcli/config/index.html b/zh/0.6.0/swcli/config/index.html index 589183e41..1eac79476 100644 --- a/zh/0.6.0/swcli/config/index.html +++ b/zh/0.6.0/swcli/config/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    配置文件

    Standalone Instance 是安装在用户的笔记本或开发服务器上,以Linux/Mac用户为粒度进行隔离。用户通过 pip 命令安装 Starwhale Python package 并执行任意 swcli 命令后,就可以在 ~/.config/starwhale/config.yaml 中查看该用户的 Starwhale 配置。绝大多数情况加用户不需要手工修改config.yaml文件

    ~/.config/starwhale/config.yaml 文件权限为 0o600,由于里面存有密钥信息,不建议用户修改该文件权限。您可以通过swci config edit来修改配置:

    swcli config edit

    config.yaml 例子

    典型的 config.yaml 文件内容如下:

    • 当前默认默认 Instance 为 local。
    • cloud-cn/cloud-k8s/pre-k8s 三个为 Cloud Instance,local 为 Standalone Instance。
    • Standalone 本地存储的根目录为 /home/liutianwei/.starwhale
    current_instance: local
    instances:
    cloud-cn:
    sw_token: ${TOKEN}
    type: cloud
    updated_at: 2022-09-28 18:41:05 CST
    uri: https://cloud.starwhale.cn
    user_name: starwhale
    user_role: normal
    cloud-k8s:
    sw_token: ${TOKEN}
    type: cloud
    updated_at: 2022-09-19 16:10:01 CST
    uri: http://cloud.pre.intra.starwhale.ai
    user_name: starwhale
    user_role: normal
    local:
    current_project: self
    type: standalone
    updated_at: 2022-06-09 16:14:02 CST
    uri: local
    user_name: liutianwei
    pre-k8s:
    sw_token: ${TOKEN}
    type: cloud
    updated_at: 2022-09-19 18:06:50 CST
    uri: http://console.pre.intra.starwhale.ai
    user_name: starwhale
    user_role: normal
    link_auths:
    - ak: starwhale
    bucket: users
    connect_timeout: 10.0
    endpoint: http://10.131.0.1:9000
    read_timeout: 100.0
    sk: starwhale
    type: s3
    storage:
    root: /home/liutianwei/.starwhale
    version: '2.0'

    config.yaml 字段说明

    参数说明类型默认值是否必须
    current_instance默认使用的instance名字,一般用 swcli instance select 命令设置Stringself
    instances管理的 Instances,包括 Standalone, Server 和 Cloud Instance,至少会有 Standalone Instance(名称为local),Server/Cloud Instance有一个或多个,一般用 swcli instance login 登陆一个新的instance,swcli instance logout 退出一个instanceDictStandalone Instance,名称为local
    instances.{instance-alias-name}.sw_token登陆Token,只对Server/Cloud Instance生效,后续swcli对Server/Cloud Instance进行操作时都会使用该Token。需要注意Token有过期时间,默认1个月,可以在Server/Cloud Instance中进行设置StringCloud-是,Standalone-否
    instances.{instance-alias-name}.typeinstance类型,目前只能填写 cloudstandaloneChoice[String]
    instances.{instance-alias-name}.uri对于Server/Cloud Instance,uri是http/https地址,对于Standalone Instance,uri是 localString
    instances.{instance-alias-name}.user_name用户名String
    instances.{instance-alias-name}.current_project当前Instance下默认的Project是什么,在URI的表述中会作为project字段进行默认填充,可以通过 swcli project select 命令进行设置String
    instances.{instance-alias-name}.user_role用户角色Stringnormal
    instances.{instance-alias-name}.updated_at该条Instance配置更新时间时间格式字符串
    storage与本地存储相关的设置Dict
    storage.rootStandalone Instance本地存储的根目录。通常情况下,当home目录空间不足,手工把数据文件移动到其他位置时,可以修改该字段String~/.starwhale
    versionconfig.yaml的版本,目前仅支持2.0String2.0

    Standalone Instance 的文件存储结构

    ${storage.root} 目录中存储了 Standalone Instance 所有的用户数据,包括 Project、Runtime、Model、Dataset、Evaluation 等用户直接感知的数据,也包括 ObjectStore、DataStore 等 Starwhale 后台实现的存储。具体说明如下:

    +-- ${storage.root}
    | +-- .objectstore --> 存储数据集chunk文件的简单存储,使用blake2b hash算法
    | | +-- blake2b --> hash算法名称
    | | | +-- 00 --> hash2位前缀
    | | | | +-- 0019ad58... --> object文件,文件名是文件内容的hash值
    | | | +-- 05
    | +-- .datastore --> 基于pyarrow的列式存储
    | | +-- project
    | | | +-- self --> 按照project名称进行分类存储
    | | | | +-- dataset --> 数据集相关的datastore存储,一般用来存储数据集的索引信息
    | | | | +-- eval --> 模型评测结果存储
    | +-- .recover --> 软删除某个project的存储目录,可以用 `swcli project recover` 进行恢复
    | +-- .tmp --> Dataset/Model/Runtime 构建过程中临时目录
    | +-- myproject --> 用户创建的project,所有myproject信息都存储在该目录
    | +-- self --> Standalone Instance自动创建的project
    | | +-- dataset --> swds数据集存储目录
    | | +-- evaluation --> 模型评测配置文件、日志等存储目录
    | | +-- model --> swmp模型包存储目录
    | | +-- runtime --> swrt环境包存储目录
    | | +-- workdir --> 解压、复原包文件的目录
    | | | +-- model --> swmp解压后的目录
    | | | +-- runtime --> swrt解压后的目录,若进行runtime restore操作,生成的venv或conda隔离环境,也会存放在该目录中

    有时候您可能需要用到 starwhale.Link 来存储一些信息。理论上,Link里面的URI可以是任意的合法 URI(星鲸目前只支持S3协议族和HTTP),比如s3://10.131.0.1:9000/users/path。然而,有些 Link是需要鉴权才能访问的。 link_auths 就是用来存放这些鉴权信息的。

    link_auths:
    - type: s3
    ak: starwhale
    bucket: users
    region: local
    connect_timeout: 10.0
    endpoint: http://10.131.0.1:9000
    read_timeout: 100.0
    sk: starwhale

    link_auths 里面的每一条都会自动匹配您的URI。 目前 S3 类型的鉴权信息通过 bucketendpoint 来匹配 URI。

    - + \ No newline at end of file diff --git a/zh/0.6.0/swcli/index.html b/zh/0.6.0/swcli/index.html index 61dffd79a..89c321715 100644 --- a/zh/0.6.0/swcli/index.html +++ b/zh/0.6.0/swcli/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    Starwhale Client (swcli) 用户指南

    swcli 是一个命令行工具,可让您与 Starwhale 实例进行交互。您可以使用 swcli 完成 Starwhale 中几乎所有的任务。swcli 是用纯 Python3 编写的(需要 Python 3.7 ~ 3.11),因此可以通过 pip 命令轻松安装。目前,swcli 仅支持 Linux 和 macOS,Windows版本即将推出。

    - + \ No newline at end of file diff --git a/zh/0.6.0/swcli/installation/index.html b/zh/0.6.0/swcli/installation/index.html index 17a07ce0b..ee6302ff8 100644 --- a/zh/0.6.0/swcli/installation/index.html +++ b/zh/0.6.0/swcli/installation/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    安装指南

    swcli 命令行工具能够对各种实例完成几乎所有的操作,由于是由纯 Python3 编写,可以使用 pip 命令完成安装,本文会提供一些安装建议,帮助您获得一个干净的、无依赖冲突的 swcli Python 环境。

    安装建议

    非常不建议将 Starwhale 安装在系统的全局 Python 环境中,可能会导致 Python 的依赖冲突问题。使用 venv 或 conda 创建一个隔离的 Python 环境,并在其中安装 Starwhale,是 Python 推荐的做法。

    先决条件

    • Python3.7 ~ 3.11
    • Linux 或 macOS
    • Conda(可选)

    在Ubuntu系统中,可以运行以下命令:

    sudo apt-get install python3 python3-venv python3-pip

    #如果您想安装多个python版本
    sudo add-apt-repository -y ppa:deadsnakes/ppa
    sudo apt-get update
    sudo apt-get install -y python3.7 python3.8 python3.9 python3-pip python3-venv python3.8-venv python3.7-venv python3.9-venv

    swcli 可以在 macOS 下工作,包括 arm(M1 Chip) 和 x86(Intel Chip) 两种体系结构。但 macOS 下自带的 Python3 可能会遇到一些 Python 自身的问题,推荐使用 homebrew 进行安装:

    brew install python3

    安装 swcli

    使用venv安装

    venv 环境即可以使用 Python3 自带的 venv,也可以使用 virtualenv 工具。

    python3 -m venv ~/.cache/venv/starwhale
    source ~/.cache/venv/starwhale/bin/activate
    python3 -m pip install starwhale

    swcli --version

    sudo rm -rf /usr/local/bin/swcli
    sudo ln -s `which swcli` /usr/local/bin/

    使用conda安装

    conda create --name starwhale --yes  python=3.9
    conda activate starwhale
    python3 -m pip install starwhale

    swcli --version

    sudo rm -rf /usr/local/bin/swcli
    sudo ln -s `which swcli` /usr/local/bin/

    👏 现在,您可以在全局环境中使用 swcli 了。

    swcli 的特定场景依赖安装

    # 针对Audio处理, 主要包含soundfile库等
    python -m pip install starwhale[audio]

    # 针对Image处理,主要包含pillow库等
    python -m pip install starwhale[pillow]

    # 针对swcli model server命令
    python -m pip install starwhale[server]

    # 针对内建的Online Serving
    python -m pip install starwhale[online-serve]

    # 安装全部依赖
    python -m pip install starwhale[all]

    更新 swcli

    #适用于venv环境
    python3 -m pip install --upgrade starwhale

    #适用于conda环境
    conda run -n starwhale python3 -m pip install --upgrade starwhale

    卸载swcli

    python3 -m pip remove starwhale

    rm -rf ~/.config/starwhale
    rm -rf ~/.starwhale
    - + \ No newline at end of file diff --git a/zh/0.6.0/swcli/swignore/index.html b/zh/0.6.0/swcli/swignore/index.html index 3744c0677..2deae5512 100644 --- a/zh/0.6.0/swcli/swignore/index.html +++ b/zh/0.6.0/swcli/swignore/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    关于 .swignore 文件

    .swignore 文件与 .gitignore, .dockerignore 等文件类似,都是用来定义忽略某些文件或文件夹。.swignore 文件主要应用在 Starwhale 的模型构建过程中。默认情况下,swcli model build 命令 或 starwhale.model.build() Python SDK会遍历指定目录下的所有文件,并自动排除一些已知的、不适合放入模型包中的文件或目录。

    文件格式

    • swignore文件中的每一行指定一个匹配文件和目录的模式。
    • 空行不匹配任何文件,因此它可以作为可读性的分隔符。
    • 星号*匹配除斜杠以外的任何内容。
    • #开头的行作为注释。
    • 支持wildcard的表达,类似 *.jpg, *.png

    默认下自动排除的文件或目录

    如果不想排除这些文件,可以构建模型 (swcli model build 命令) 的时候增加 --add-all 参数。

    • __pycache__/
    • *.py[cod]
    • *$py.class
    • venv安装目录
    • conda安装目录

    例子

    这是MNIST示例中使用的.swignore文件:

    venv/*
    .git/*
    .history*
    .vscode/*
    .venv/*
    data/*
    .idea/*
    *.py[cod]
    - + \ No newline at end of file diff --git a/zh/0.6.0/swcli/uri/index.html b/zh/0.6.0/swcli/uri/index.html index 04c5fae72..32ed2793a 100644 --- a/zh/0.6.0/swcli/uri/index.html +++ b/zh/0.6.0/swcli/uri/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.0

    Starwhale 资源URI

    提示

    资源 URI 在 Starwhale Client 中被广泛使用。URI 可以引用本地实例中的资源或远程实例中的任何其他资源。 这样 Starwhale Client 就可以轻松操作任何资源。

    concepts-org.jpg

    实例URI

    实例 URI 可以是以下形式之一:

    • local: 指本地的 Standalone 实例.
    • [http(s)://]<hostname or ip>[:<port>]:指向一个 Starwhale Cloud 实例。
    • [cloud://]<cloud alias>:Server或Cloud的实例别名,可以在实例登录阶段配置。
    警告

    “local”不同于“localhost”,前者为 Standalone 实例,而后者是一个 URL ,指向本地运行的 Starwhale Server 实例。

    例子:

    # 登录Starwhale Cloud,别名为swcloud
    swcli instance login --username <your account name> --password <your password> https://cloud.starwhale.cn --alias swcloud

    # 将模型从本地实例复制到云实例
    swcli model copy mnist/version/latest swcloud/project/<your account name>:demo

    # 将运行时复制到Starwhale Server实例:http://localhost:8081
    swcli runtime copy pytorch/version/v1 http://localhost:8081/project/<your account name>:demo

    项目URI

    项目URI的格式为“[<实例URI>/project/]<project name>”。 如果未指定实例 URI,则使用当前实例。

    例子:

    swcli project select self   # 选择当前实例中的self项目
    swcli project info local/project/self # 查看本地实例中的self项目信息

    模型/数据集/运行时URI

    • 模型URI: [<项目URI>/model/]<model name>[/version/<version id|tag>].
    • 数据集URI: [<项目URI>/dataset/]<dataset name>[/version/<version id|tag>].
    • 运行时URI: [<项目URI>/runtime/]<runtime name>[/version/<version id|tag>].
    提示
    • swcli 支持更加人性化的短版本ID。您可以只键入版本ID的前几个字符,前提是它至少有四个字符长且唯一指向某个版本ID。但是,recover 命令必须使用完整的版本ID。
    • 如果未指定项目URI,将使用默认项目
    • 您始终可以使用版本标签而不是版本ID。

    例子:

    swcli model info mnist/version/hbtdenjxgm4ggnrtmftdgyjzm43tioi  # 检查模型信息,模型名称:mnist,版本:hbtdenjxgm4ggnrtmftdgyjzm43tioi
    swcli model remove mnist/version/hbtdenj # 使用短版本ID
    swcli model info mnist # 检查mnist模型信息
    swcli model run mnist --runtime pytorch-mnist --dataset mnist # 使用latest的默认tag

    作业URI

    • 格式: [<项目URI>/job/]<job id>.
    • 如果未指定项目URI,将使用默认项目。

    例子:

    swcli job info mezdayjzge3w   # 查看默认实例和默认项目中的mezdayjzge3w版本
    swcli job info local/project/self/job/mezday # 检查本地实例,self项目,作业id:mezday

    默认实例

    当项目URI中的实例部分被省略时,将使用默认实例进行替代。默认实例是由 swcli instance loginswcli instance use 指定的。

    默认项目

    当模型/数据集/运行时/评估URI的项目部分被省略时,将使用默认项目。默认项目是指通过 swcli project use 命令选择的项目。

    - + \ No newline at end of file diff --git a/zh/404.html b/zh/404.html index c49073edb..8386a125b 100644 --- a/zh/404.html +++ b/zh/404.html @@ -10,13 +10,13 @@ - +

    找不到页面

    我们找不到您要找的页面。

    请联系原始链接来源网站的所有者,并告知他们链接已损坏。

    - + \ No newline at end of file diff --git a/zh/assets/js/20d3256d.312abaae.js b/zh/assets/js/20d3256d.a7d310f4.js similarity index 78% rename from zh/assets/js/20d3256d.312abaae.js rename to zh/assets/js/20d3256d.a7d310f4.js index 025b94270..dd1b17679 100644 --- a/zh/assets/js/20d3256d.312abaae.js +++ b/zh/assets/js/20d3256d.a7d310f4.js @@ -1 +1 @@ -"use strict";(self.webpackChunkstarwhale_docs=self.webpackChunkstarwhale_docs||[]).push([[962],{3905:(e,t,r)=>{r.d(t,{Zo:()=>s,kt:()=>m});var a=r(7294);function n(e,t,r){return t in e?Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[t]=r,e}function l(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,a)}return r}function o(e){for(var t=1;t=0||(n[r]=e[r]);return n}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(n[r]=e[r])}return n}var i=a.createContext({}),u=function(e){var t=a.useContext(i),r=t;return e&&(r="function"==typeof e?e(t):o(o({},t),e)),r},s=function(e){var t=u(e.components);return a.createElement(i.Provider,{value:t},e.children)},p={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},d=a.forwardRef((function(e,t){var r=e.components,n=e.mdxType,l=e.originalType,i=e.parentName,s=c(e,["components","mdxType","originalType","parentName"]),d=u(r),m=n,h=d["".concat(i,".").concat(m)]||d[m]||p[m]||l;return r?a.createElement(h,o(o({ref:t},s),{},{components:r})):a.createElement(h,o({ref:t},s))}));function m(e,t){var r=arguments,n=t&&t.mdxType;if("string"==typeof e||n){var l=r.length,o=new Array(l);o[0]=d;var c={};for(var i in t)hasOwnProperty.call(t,i)&&(c[i]=t[i]);c.originalType=e,c.mdxType="string"==typeof e?e:n,o[1]=c;for(var u=2;u{r.r(t),r.d(t,{assets:()=>i,contentTitle:()=>o,default:()=>p,frontMatter:()=>l,metadata:()=>c,toc:()=>u});var a=r(3117),n=(r(7294),r(3905));const l={title:"Starwhale Cloud\u5165\u95e8\u6307\u5357"},o=void 0,c={unversionedId:"getting-started/cloud",id:"getting-started/cloud",title:"Starwhale Cloud\u5165\u95e8\u6307\u5357",description:"Starwhale Cloud\u8fd0\u884c\u5728\u963f\u91cc\u4e91\u4e0a\uff0c\u57df\u540d\u662f \uff0c\u540e\u7eed\u6211\u4eec\u4f1a\u63a8\u51fa\u90e8\u7f72\u5728AWS\u4e0a\u7684 \u670d\u52a1\uff0c\u9700\u8981\u6ce8\u610f\u7684\u662f\uff0c\u8fd9\u662f\u4e24\u4e2a\u76f8\u4e92\u72ec\u7acb\u7684\u5b9e\u4f8b\uff0c\u5e10\u6237\u548c\u6570\u636e\u4e0d\u5171\u4eab\u3002\u60a8\u53ef\u4ee5\u9009\u62e9\u4efb\u4f55\u4e00\u4e2a\u5f00\u59cb\u3002",source:"@site/i18n/zh/docusaurus-plugin-content-docs/current/getting-started/cloud.md",sourceDirName:"getting-started",slug:"/getting-started/cloud",permalink:"/zh/next/getting-started/cloud",draft:!1,editUrl:"https://github.com/star-whale/docs/tree/main/docs/getting-started/cloud.md",tags:[],version:"current",frontMatter:{title:"Starwhale Cloud\u5165\u95e8\u6307\u5357"},sidebar:"mainSidebar",previous:{title:"Starwhale Server\u5165\u95e8\u6307\u5357",permalink:"/zh/next/getting-started/server"},next:{title:"Starwhale Runtime\u5165\u95e8\u6307\u5357",permalink:"/zh/next/getting-started/runtime"}},i={},u=[{value:"\u6ce8\u518cStarwhale Cloud\u5e76\u521b\u5efa\u60a8\u7684\u7b2c\u4e00\u4e2a\u9879\u76ee",id:"\u6ce8\u518cstarwhale-cloud\u5e76\u521b\u5efa\u60a8\u7684\u7b2c\u4e00\u4e2a\u9879\u76ee",level:2},{value:"\u5728\u672c\u5730\u673a\u5668\u4e0a\u6784\u5efa\u6570\u636e\u96c6\u3001\u6a21\u578b\u548c\u8fd0\u884c\u65f6",id:"\u5728\u672c\u5730\u673a\u5668\u4e0a\u6784\u5efa\u6570\u636e\u96c6\u6a21\u578b\u548c\u8fd0\u884c\u65f6",level:2},{value:"\u767b\u5f55\u4e91\u5b9e\u4f8b",id:"\u767b\u5f55\u4e91\u5b9e\u4f8b",level:2},{value:"\u5c06\u6570\u636e\u96c6\u3001\u6a21\u578b\u548c\u8fd0\u884c\u65f6\u590d\u5236\u5230Starwhale Cloud",id:"\u5c06\u6570\u636e\u96c6\u6a21\u578b\u548c\u8fd0\u884c\u65f6\u590d\u5236\u5230starwhale-cloud",level:2},{value:"\u4f7f\u7528 Web UI \u8fd0\u884c\u8bc4\u4f30",id:"\u4f7f\u7528-web-ui-\u8fd0\u884c\u8bc4\u4f30",level:2}],s={toc:u};function p(e){let{components:t,...r}=e;return(0,n.kt)("wrapper",(0,a.Z)({},s,r,{components:t,mdxType:"MDXLayout"}),(0,n.kt)("p",null,"Starwhale Cloud\u8fd0\u884c\u5728\u963f\u91cc\u4e91\u4e0a\uff0c\u57df\u540d\u662f ",(0,n.kt)("a",{parentName:"p",href:"https://cloud.starwhale.cn"},"https://cloud.starwhale.cn")," \uff0c\u540e\u7eed\u6211\u4eec\u4f1a\u63a8\u51fa\u90e8\u7f72\u5728AWS\u4e0a\u7684 ",(0,n.kt)("a",{parentName:"p",href:"https://cloud.starwhale.ai"},"https://cloud.starwhale.ai")," \u670d\u52a1\uff0c\u9700\u8981\u6ce8\u610f\u7684\u662f\uff0c\u8fd9\u662f\u4e24\u4e2a\u76f8\u4e92\u72ec\u7acb\u7684\u5b9e\u4f8b\uff0c\u5e10\u6237\u548c\u6570\u636e\u4e0d\u5171\u4eab\u3002\u60a8\u53ef\u4ee5\u9009\u62e9\u4efb\u4f55\u4e00\u4e2a\u5f00\u59cb\u3002"),(0,n.kt)("p",null,"\u5728\u5f00\u59cb\u4e4b\u524d\uff0c\u60a8\u9700\u8981\u5148\u5b89\u88c5",(0,n.kt)("a",{parentName:"p",href:"../swcli"},"Starwhale Client\uff08swcli\uff09"),"\u3002"),(0,n.kt)("h2",{id:"\u6ce8\u518cstarwhale-cloud\u5e76\u521b\u5efa\u60a8\u7684\u7b2c\u4e00\u4e2a\u9879\u76ee"},"\u6ce8\u518cStarwhale Cloud\u5e76\u521b\u5efa\u60a8\u7684\u7b2c\u4e00\u4e2a\u9879\u76ee"),(0,n.kt)("p",null,"\u60a8\u53ef\u4ee5\u76f4\u63a5\u4f7f\u7528\u81ea\u5df1\u7684GitHub\u6216\u5fae\u4fe1\u5e10\u53f7\u767b\u5f55\uff0c\u4e5f\u53ef\u4ee5\u6ce8\u518c\u4e00\u4e2a\u65b0\u7684\u5e10\u53f7\u3002\u5982\u679c\u60a8\u4f7f\u7528 GitHub \u6216 \u5fae\u4fe1\u5e10\u53f7\u767b\u5f55\uff0c\u7cfb\u7edf\u4f1a\u8981\u6c42\u60a8\u63d0\u4f9b\u7528\u6237\u540d\u3002"),(0,n.kt)("p",null,"\u7136\u540e\u60a8\u53ef\u4ee5\u521b\u5efa\u4e00\u4e2a\u65b0\u9879\u76ee\u3002\u5728\u672c\u6559\u7a0b\u4e2d\uff0c\u6211\u4eec\u5c06\u4f7f\u7528\u540d\u79f0 ",(0,n.kt)("inlineCode",{parentName:"p"},"demo")," \u4f5c\u4e3a\u9879\u76ee\u540d\u79f0\u3002"),(0,n.kt)("h2",{id:"\u5728\u672c\u5730\u673a\u5668\u4e0a\u6784\u5efa\u6570\u636e\u96c6\u6a21\u578b\u548c\u8fd0\u884c\u65f6"},"\u5728\u672c\u5730\u673a\u5668\u4e0a\u6784\u5efa\u6570\u636e\u96c6\u3001\u6a21\u578b\u548c\u8fd0\u884c\u65f6"),(0,n.kt)("p",null,"\u6309\u7167",(0,n.kt)("a",{parentName:"p",href:"standalone"},"Starwhale Standalone\u5165\u95e8\u6307\u5357"),"\u4e2d\u7684\u6b65\u9aa41\u5230\u6b65\u9aa44\u5728\u672c\u5730\u673a\u5668\u4e0a\u521b\u5efa\uff1a"),(0,n.kt)("ul",null,(0,n.kt)("li",{parentName:"ul"},"\u4e00\u4e2a\u540d\u4e3amnist\u7684Starwhale\u6a21\u578b"),(0,n.kt)("li",{parentName:"ul"},"\u4e00\u4e2a\u540d\u4e3amnist\u7684Starwhale\u6570\u636e\u96c6"),(0,n.kt)("li",{parentName:"ul"},"\u4e00\u4e2a\u540d\u4e3apytorch\u7684Starwhale\u8fd0\u884c\u65f6")),(0,n.kt)("h2",{id:"\u767b\u5f55\u4e91\u5b9e\u4f8b"},"\u767b\u5f55\u4e91\u5b9e\u4f8b"),(0,n.kt)("pre",null,(0,n.kt)("code",{parentName:"pre",className:"language-bash"},"swcli instance login --username <\u60a8\u7684\u7528\u6237\u540d> --password <\u60a8\u7684\u5bc6\u7801> --alias swcloud https://cloud.starwhale.cn\n")),(0,n.kt)("h2",{id:"\u5c06\u6570\u636e\u96c6\u6a21\u578b\u548c\u8fd0\u884c\u65f6\u590d\u5236\u5230starwhale-cloud"},"\u5c06\u6570\u636e\u96c6\u3001\u6a21\u578b\u548c\u8fd0\u884c\u65f6\u590d\u5236\u5230Starwhale Cloud"),(0,n.kt)("pre",null,(0,n.kt)("code",{parentName:"pre",className:"language-bash"},"swcli model copy mnist swcloud/project/demo\nswcli dataset copy mnist swcloud/project/demo\nswcli runtime copy pytorch swcloud/project/demo\n")),(0,n.kt)("h2",{id:"\u4f7f\u7528-web-ui-\u8fd0\u884c\u8bc4\u4f30"},"\u4f7f\u7528 Web UI \u8fd0\u884c\u8bc4\u4f30"),(0,n.kt)("p",null,(0,n.kt)("img",{parentName:"p",src:"https://starwhale-examples.oss-cn-beijing.aliyuncs.com/docs/console-create-job.gif",alt:"console-create-job.gif"})),(0,n.kt)("p",null,(0,n.kt)("strong",{parentName:"p"},"\u606d\u559c\uff01 \u60a8\u5df2\u5b8c\u6210Starwhale Cloud\u7684\u5165\u95e8\u6307\u5357\u3002")))}p.isMDXComponent=!0}}]); \ No newline at end of file +"use strict";(self.webpackChunkstarwhale_docs=self.webpackChunkstarwhale_docs||[]).push([[962],{3905:(e,t,r)=>{r.d(t,{Zo:()=>s,kt:()=>h});var a=r(7294);function n(e,t,r){return t in e?Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[t]=r,e}function l(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,a)}return r}function o(e){for(var t=1;t=0||(n[r]=e[r]);return n}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(n[r]=e[r])}return n}var i=a.createContext({}),u=function(e){var t=a.useContext(i),r=t;return e&&(r="function"==typeof e?e(t):o(o({},t),e)),r},s=function(e){var t=u(e.components);return a.createElement(i.Provider,{value:t},e.children)},p={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},d=a.forwardRef((function(e,t){var r=e.components,n=e.mdxType,l=e.originalType,i=e.parentName,s=c(e,["components","mdxType","originalType","parentName"]),d=u(r),h=n,m=d["".concat(i,".").concat(h)]||d[h]||p[h]||l;return r?a.createElement(m,o(o({ref:t},s),{},{components:r})):a.createElement(m,o({ref:t},s))}));function h(e,t){var r=arguments,n=t&&t.mdxType;if("string"==typeof e||n){var l=r.length,o=new Array(l);o[0]=d;var c={};for(var i in t)hasOwnProperty.call(t,i)&&(c[i]=t[i]);c.originalType=e,c.mdxType="string"==typeof e?e:n,o[1]=c;for(var u=2;u{r.r(t),r.d(t,{assets:()=>i,contentTitle:()=>o,default:()=>p,frontMatter:()=>l,metadata:()=>c,toc:()=>u});var a=r(3117),n=(r(7294),r(3905));const l={title:"Starwhale Cloud\u5165\u95e8\u6307\u5357"},o=void 0,c={unversionedId:"getting-started/cloud",id:"getting-started/cloud",title:"Starwhale Cloud\u5165\u95e8\u6307\u5357",description:"Starwhale Cloud\u8fd0\u884c\u5728\u963f\u91cc\u4e91\u4e0a\uff0c\u57df\u540d\u662f \uff0c\u540e\u7eed\u6211\u4eec\u4f1a\u63a8\u51fa\u90e8\u7f72\u5728AWS\u4e0a\u7684 \u670d\u52a1\uff0c\u9700\u8981\u6ce8\u610f\u7684\u662f\uff0c\u8fd9\u662f\u4e24\u4e2a\u76f8\u4e92\u72ec\u7acb\u7684\u5b9e\u4f8b\uff0c\u5e10\u6237\u548c\u6570\u636e\u4e0d\u5171\u4eab\u3002\u60a8\u53ef\u4ee5\u9009\u62e9\u4efb\u4f55\u4e00\u4e2a\u5f00\u59cb\u3002",source:"@site/i18n/zh/docusaurus-plugin-content-docs/current/getting-started/cloud.md",sourceDirName:"getting-started",slug:"/getting-started/cloud",permalink:"/zh/next/getting-started/cloud",draft:!1,editUrl:"https://github.com/star-whale/docs/tree/main/docs/getting-started/cloud.md",tags:[],version:"current",frontMatter:{title:"Starwhale Cloud\u5165\u95e8\u6307\u5357"},sidebar:"mainSidebar",previous:{title:"Starwhale Server\u5165\u95e8\u6307\u5357",permalink:"/zh/next/getting-started/server"},next:{title:"Starwhale Runtime\u5165\u95e8\u6307\u5357",permalink:"/zh/next/getting-started/runtime"}},i={},u=[{value:"\u6ce8\u518cStarwhale Cloud\u5e76\u521b\u5efa\u60a8\u7684\u7b2c\u4e00\u4e2a\u9879\u76ee",id:"\u6ce8\u518cstarwhale-cloud\u5e76\u521b\u5efa\u60a8\u7684\u7b2c\u4e00\u4e2a\u9879\u76ee",level:2},{value:"\u5728\u672c\u5730\u673a\u5668\u4e0a\u6784\u5efa\u6570\u636e\u96c6\u3001\u6a21\u578b\u548c\u8fd0\u884c\u65f6",id:"\u5728\u672c\u5730\u673a\u5668\u4e0a\u6784\u5efa\u6570\u636e\u96c6\u6a21\u578b\u548c\u8fd0\u884c\u65f6",level:2},{value:"\u767b\u5f55\u4e91\u5b9e\u4f8b",id:"\u767b\u5f55\u4e91\u5b9e\u4f8b",level:2},{value:"\u5c06\u6570\u636e\u96c6\u3001\u6a21\u578b\u548c\u8fd0\u884c\u65f6\u590d\u5236\u5230Starwhale Cloud",id:"\u5c06\u6570\u636e\u96c6\u6a21\u578b\u548c\u8fd0\u884c\u65f6\u590d\u5236\u5230starwhale-cloud",level:2},{value:"\u4f7f\u7528 Web UI \u8fd0\u884c\u8bc4\u4f30",id:"\u4f7f\u7528-web-ui-\u8fd0\u884c\u8bc4\u4f30",level:2}],s={toc:u};function p(e){let{components:t,...r}=e;return(0,n.kt)("wrapper",(0,a.Z)({},s,r,{components:t,mdxType:"MDXLayout"}),(0,n.kt)("p",null,"Starwhale Cloud\u8fd0\u884c\u5728\u963f\u91cc\u4e91\u4e0a\uff0c\u57df\u540d\u662f ",(0,n.kt)("a",{parentName:"p",href:"https://cloud.starwhale.cn"},"https://cloud.starwhale.cn")," \uff0c\u540e\u7eed\u6211\u4eec\u4f1a\u63a8\u51fa\u90e8\u7f72\u5728AWS\u4e0a\u7684 ",(0,n.kt)("a",{parentName:"p",href:"https://cloud.starwhale.ai"},"https://cloud.starwhale.ai")," \u670d\u52a1\uff0c\u9700\u8981\u6ce8\u610f\u7684\u662f\uff0c\u8fd9\u662f\u4e24\u4e2a\u76f8\u4e92\u72ec\u7acb\u7684\u5b9e\u4f8b\uff0c\u5e10\u6237\u548c\u6570\u636e\u4e0d\u5171\u4eab\u3002\u60a8\u53ef\u4ee5\u9009\u62e9\u4efb\u4f55\u4e00\u4e2a\u5f00\u59cb\u3002"),(0,n.kt)("p",null,"\u5728\u5f00\u59cb\u4e4b\u524d\uff0c\u60a8\u9700\u8981\u5148\u5b89\u88c5",(0,n.kt)("a",{parentName:"p",href:"../swcli"},"Starwhale Client\uff08swcli\uff09"),"\u3002"),(0,n.kt)("h2",{id:"\u6ce8\u518cstarwhale-cloud\u5e76\u521b\u5efa\u60a8\u7684\u7b2c\u4e00\u4e2a\u9879\u76ee"},"\u6ce8\u518cStarwhale Cloud\u5e76\u521b\u5efa\u60a8\u7684\u7b2c\u4e00\u4e2a\u9879\u76ee"),(0,n.kt)("p",null,"\u60a8\u53ef\u4ee5\u76f4\u63a5\u4f7f\u7528\u81ea\u5df1\u7684GitHub\u6216\u5fae\u4fe1\u5e10\u53f7\u767b\u5f55\uff0c\u4e5f\u53ef\u4ee5\u6ce8\u518c\u4e00\u4e2a\u65b0\u7684\u5e10\u53f7\u3002\u5982\u679c\u60a8\u4f7f\u7528 GitHub \u6216 \u5fae\u4fe1\u5e10\u53f7\u767b\u5f55\uff0c\u7cfb\u7edf\u4f1a\u8981\u6c42\u60a8\u63d0\u4f9b\u7528\u6237\u540d\u3002"),(0,n.kt)("p",null,"\u7136\u540e\u60a8\u53ef\u4ee5\u521b\u5efa\u4e00\u4e2a\u65b0\u9879\u76ee\u3002\u5728\u672c\u6559\u7a0b\u4e2d\uff0c\u6211\u4eec\u5c06\u4f7f\u7528\u540d\u79f0 ",(0,n.kt)("inlineCode",{parentName:"p"},"demo")," \u4f5c\u4e3a\u9879\u76ee\u540d\u79f0\u3002"),(0,n.kt)("h2",{id:"\u5728\u672c\u5730\u673a\u5668\u4e0a\u6784\u5efa\u6570\u636e\u96c6\u6a21\u578b\u548c\u8fd0\u884c\u65f6"},"\u5728\u672c\u5730\u673a\u5668\u4e0a\u6784\u5efa\u6570\u636e\u96c6\u3001\u6a21\u578b\u548c\u8fd0\u884c\u65f6"),(0,n.kt)("p",null,"\u6309\u7167",(0,n.kt)("a",{parentName:"p",href:"standalone"},"Starwhale Standalone\u5165\u95e8\u6307\u5357"),"\u4e2d\u7684\u6b65\u9aa41\u5230\u6b65\u9aa44\u5728\u672c\u5730\u673a\u5668\u4e0a\u521b\u5efa\uff1a"),(0,n.kt)("ul",null,(0,n.kt)("li",{parentName:"ul"},"\u4e00\u4e2a\u540d\u4e3ahelloworld\u7684Starwhale\u6a21\u578b"),(0,n.kt)("li",{parentName:"ul"},"\u4e00\u4e2a\u540d\u4e3amnist64\u7684Starwhale\u6570\u636e\u96c6"),(0,n.kt)("li",{parentName:"ul"},"\u4e00\u4e2a\u540d\u4e3ahelloworld\u7684Starwhale\u8fd0\u884c\u65f6")),(0,n.kt)("h2",{id:"\u767b\u5f55\u4e91\u5b9e\u4f8b"},"\u767b\u5f55\u4e91\u5b9e\u4f8b"),(0,n.kt)("pre",null,(0,n.kt)("code",{parentName:"pre",className:"language-bash"},"swcli instance login --username <\u60a8\u7684\u7528\u6237\u540d> --password <\u60a8\u7684\u5bc6\u7801> --alias swcloud https://cloud.starwhale.cn\n")),(0,n.kt)("h2",{id:"\u5c06\u6570\u636e\u96c6\u6a21\u578b\u548c\u8fd0\u884c\u65f6\u590d\u5236\u5230starwhale-cloud"},"\u5c06\u6570\u636e\u96c6\u3001\u6a21\u578b\u548c\u8fd0\u884c\u65f6\u590d\u5236\u5230Starwhale Cloud"),(0,n.kt)("pre",null,(0,n.kt)("code",{parentName:"pre",className:"language-bash"},"swcli model copy helloworld swcloud/project/demo\nswcli dataset copy mnist64 swcloud/project/demo\nswcli runtime copy helloworld swcloud/project/demo\n")),(0,n.kt)("h2",{id:"\u4f7f\u7528-web-ui-\u8fd0\u884c\u8bc4\u4f30"},"\u4f7f\u7528 Web UI \u8fd0\u884c\u8bc4\u4f30"),(0,n.kt)("p",null,(0,n.kt)("img",{parentName:"p",src:"https://starwhale-examples.oss-cn-beijing.aliyuncs.com/docs/console-create-job.gif",alt:"console-create-job.gif"})),(0,n.kt)("p",null,(0,n.kt)("strong",{parentName:"p"},"\u606d\u559c\uff01 \u60a8\u5df2\u5b8c\u6210Starwhale Cloud\u7684\u5165\u95e8\u6307\u5357\u3002")))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/zh/assets/js/94a33573.c6306e92.js b/zh/assets/js/94a33573.c6306e92.js deleted file mode 100644 index 7f246bae3..000000000 --- a/zh/assets/js/94a33573.c6306e92.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunkstarwhale_docs=self.webpackChunkstarwhale_docs||[]).push([[6007],{3905:(e,t,a)=>{a.d(t,{Zo:()=>m,kt:()=>d});var n=a(7294);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function l(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function i(e){for(var t=1;t=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var o=n.createContext({}),s=function(e){var t=n.useContext(o),a=t;return e&&(a="function"==typeof e?e(t):i(i({},t),e)),a},m=function(e){var t=s(e.components);return n.createElement(o.Provider,{value:t},e.children)},c={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},u=n.forwardRef((function(e,t){var a=e.components,r=e.mdxType,l=e.originalType,o=e.parentName,m=p(e,["components","mdxType","originalType","parentName"]),u=s(a),d=r,h=u["".concat(o,".").concat(d)]||u[d]||c[d]||l;return a?n.createElement(h,i(i({ref:t},m),{},{components:a})):n.createElement(h,i({ref:t},m))}));function d(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var l=a.length,i=new Array(l);i[0]=u;var p={};for(var o in t)hasOwnProperty.call(t,o)&&(p[o]=t[o]);p.originalType=e,p.mdxType="string"==typeof e?e:r,i[1]=p;for(var s=2;s{a.r(t),a.d(t,{assets:()=>o,contentTitle:()=>i,default:()=>c,frontMatter:()=>l,metadata:()=>p,toc:()=>s});var n=a(3117),r=(a(7294),a(3905));const l={title:"Starwhale Standalone\u5165\u95e8\u6307\u5357"},i=void 0,p={unversionedId:"getting-started/standalone",id:"getting-started/standalone",title:"Starwhale Standalone\u5165\u95e8\u6307\u5357",description:"\u5f53Starwhale Client\uff08swcli\uff09\u5b89\u88c5\u5b8c\u6210\u540e\uff0c\u60a8\u5c31\u53ef\u4ee5\u4f7f\u7528Starwhale Standalone\u3002",source:"@site/i18n/zh/docusaurus-plugin-content-docs/current/getting-started/standalone.md",sourceDirName:"getting-started",slug:"/getting-started/standalone",permalink:"/zh/next/getting-started/standalone",draft:!1,editUrl:"https://github.com/star-whale/docs/tree/main/docs/getting-started/standalone.md",tags:[],version:"current",frontMatter:{title:"Starwhale Standalone\u5165\u95e8\u6307\u5357"},sidebar:"mainSidebar",previous:{title:"\u5165\u95e8\u6307\u5357",permalink:"/zh/next/getting-started/"},next:{title:"Starwhale Server\u5165\u95e8\u6307\u5357",permalink:"/zh/next/getting-started/server"}},o={},s=[{value:"\u4e0b\u8f7d\u4f8b\u5b50",id:"\u4e0b\u8f7d\u4f8b\u5b50",level:2},{value:"\u6784\u5efa Pytorch \u8fd0\u884c\u65f6",id:"\u6784\u5efa-pytorch-\u8fd0\u884c\u65f6",level:2},{value:"\u6784\u5efa\u6a21\u578b",id:"\u6784\u5efa\u6a21\u578b",level:2},{value:"\u6784\u5efa\u6570\u636e\u96c6",id:"\u6784\u5efa\u6570\u636e\u96c6",level:2},{value:"\u8fd0\u884c\u8bc4\u4f30\u4f5c\u4e1a",id:"\u8fd0\u884c\u8bc4\u4f30\u4f5c\u4e1a",level:2}],m={toc:s};function c(e){let{components:t,...a}=e;return(0,r.kt)("wrapper",(0,n.Z)({},m,a,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("p",null,"\u5f53",(0,r.kt)("a",{parentName:"p",href:"../swcli/"},"Starwhale Client\uff08swcli\uff09"),"\u5b89\u88c5\u5b8c\u6210\u540e\uff0c\u60a8\u5c31\u53ef\u4ee5\u4f7f\u7528Starwhale Standalone\u3002"),(0,r.kt)("p",null,"\u6211\u4eec\u4e5f\u63d0\u4f9b\u5bf9\u5e94\u7684Jupyter Notebook\u4f8b\u5b50\uff0c\u53ef\u4ee5\u5728 ",(0,r.kt)("a",{parentName:"p",href:"https://colab.research.google.com/github/star-whale/starwhale/blob/main/example/notebooks/quickstart-standalone.ipynb"},"Google Colab")," \u6216\u672c\u5730\u7684 ",(0,r.kt)("a",{parentName:"p",href:"https://github.com/star-whale/starwhale/blob/main/example/notebooks/quickstart-standalone.ipynb"},"vscode/jupyterlab")," \u4e2d\u8bd5\u7528\u3002"),(0,r.kt)("h2",{id:"\u4e0b\u8f7d\u4f8b\u5b50"},"\u4e0b\u8f7d\u4f8b\u5b50"),(0,r.kt)("p",null,"\u901a\u8fc7\u4ee5\u4e0b\u65b9\u5f0f\u514b\u9686Starwhale\u9879\u76ee\u6765\u4e0b\u8f7dStarwhale\u793a\u4f8b\uff1a"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-bash"},"GIT_LFS_SKIP_SMUDGE=1 git clone https://github.com/star-whale/starwhale.git --depth 1\ncd starwhale\n")),(0,r.kt)("p",null,"\u4e3a\u4e86\u8282\u7701\u4f8b\u5b50\u7684\u4e0b\u8f7d\u65f6\u95f4\uff0c\u6211\u4eec\u6267\u884cgit clone\u547d\u4ee4\u65f6\uff0c\u5ffd\u7565\u4e86git-lfs\uff0c\u5e76\u53ea\u4fdd\u7559\u6700\u8fd1\u4e00\u6b21\u7684commit\u4fe1\u606f\u3002\u6211\u4eec\u9009\u7528ML/DL\u9886\u57df\u7684HelloWorld\u7a0b\u5e8f-MNIST\u6765\u4ecb\u7ecd\u5982\u4f55\u4ece\u96f6\u5f00\u59cb\u6784\u5efa\u6570\u636e\u96c6\u3001\u6a21\u578b\u5305\u548c\u8fd0\u884c\u73af\u5883\uff0c\u5e76\u6700\u7ec8\u5b8c\u6210\u6a21\u578b\u8bc4\u6d4b\u3002\u63a5\u4e0b\u6765\u7684\u64cd\u4f5c\u90fd\u5728 starwhale \u76ee\u5f55\u4e2d\u8fdb\u884c\u3002"),(0,r.kt)("p",null,(0,r.kt)("img",{parentName:"p",src:"https://starwhale-examples.oss-cn-beijing.aliyuncs.com/docs/standalone-core-workflow.gif",alt:"\u6838\u5fc3\u5de5\u4f5c\u6d41\u7a0b"})),(0,r.kt)("h2",{id:"\u6784\u5efa-pytorch-\u8fd0\u884c\u65f6"},"\u6784\u5efa Pytorch \u8fd0\u884c\u65f6"),(0,r.kt)("p",null,"\u8fd0\u884c\u65f6\u793a\u4f8b\u4ee3\u7801\u4f4d\u4e8e",(0,r.kt)("inlineCode",{parentName:"p"},"example/runtime/pytorch"),"\u76ee\u5f55\u4e2d\u3002"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("p",{parentName:"li"},"\u6784\u5efaStarwhale\u8fd0\u884c\u65f6\u5305\uff1a"),(0,r.kt)("pre",{parentName:"li"},(0,r.kt)("code",{parentName:"pre",className:"language-bash"},"swcli runtime build --yaml example/runtime/pytorch/runtime.yaml\n")),(0,r.kt)("admonition",{parentName:"li",type:"tip"},(0,r.kt)("p",{parentName:"admonition"},"\u5f53\u9996\u6b21\u6784\u5efaStarwhale Runtime\u65f6\uff0c\u7531\u4e8e\u9700\u8981\u521b\u5efavenv\u6216conda\u9694\u79bb\u73af\u5883\uff0c\u5e76\u4e0b\u8f7d\u76f8\u5173\u7684Python\u4f9d\u8d56\uff0c\u547d\u4ee4\u6267\u884c\u9700\u8981\u82b1\u8d39\u4e00\u6bb5\u65f6\u95f4\u3002\u65f6\u95f4\u957f\u77ed\u53d6\u51b3\u4e0e\u6240\u5728\u673a\u5668\u7684\u7f51\u7edc\u60c5\u51b5\u548cruntime.yaml\u4e2dPython\u4f9d\u8d56\u7684\u6570\u91cf\u3002\u5efa\u8bae\u5408\u7406\u8bbe\u7f6e\u673a\u5668\u7684 ",(0,r.kt)("inlineCode",{parentName:"p"},"~/.pip/pip.conf")," \u6587\u4ef6\uff0c\u586b\u5199\u7f13\u5b58\u8def\u5f84\u548c\u9002\u5408\u5f53\u524d\u7f51\u7edc\u73af\u5883\u7684pypi mirror\u5730\u5740\u3002"),(0,r.kt)("p",{parentName:"admonition"},"\u5904\u4e8e\u4e2d\u56fd\u5927\u9646\u7f51\u7edc\u73af\u5883\u4e2d\u7684\u7528\u6237\uff0c\u53ef\u4ee5\u53c2\u8003\u5982\u4e0b\u914d\u7f6e\uff1a"),(0,r.kt)("pre",{parentName:"admonition"},(0,r.kt)("code",{parentName:"pre",className:"language-conf"},"[global]\ncache-dir = ~/.cache/pip\nindex-url = https://pypi.tuna.tsinghua.edu.cn/simple\nextra-index-url = https://mirrors.aliyun.com/pypi/simple/\n")))),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("p",{parentName:"li"},"\u68c0\u67e5\u60a8\u672c\u5730\u7684Starwhale\u8fd0\u884c\u65f6\uff1a"),(0,r.kt)("pre",{parentName:"li"},(0,r.kt)("code",{parentName:"pre",className:"language-bash"},"swcli runtime list\nswcli runtime info pytorch\n")))),(0,r.kt)("h2",{id:"\u6784\u5efa\u6a21\u578b"},"\u6784\u5efa\u6a21\u578b"),(0,r.kt)("p",null,"\u6a21\u578b\u793a\u4f8b\u4ee3\u7801\u4f4d\u4e8e ",(0,r.kt)("inlineCode",{parentName:"p"},"example/mnist")," \u76ee\u5f55\u4e2d\u3002"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("p",{parentName:"li"},"\u4e0b\u8f7d\u9884\u8bad\u7ec3\u6a21\u578b\u6587\u4ef6\uff1a"),(0,r.kt)("pre",{parentName:"li"},(0,r.kt)("code",{parentName:"pre",className:"language-bash"},"cd example/mnist\nCN=1 make download-model\n# \u975e\u4e2d\u56fd\u5927\u9646\u7f51\u7edc\u7528\u6237\uff0c\u53ef\u4ee5\u7701\u7565 CN=1 \u73af\u5883\u53d8\u91cf\ncd -\n"))),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("p",{parentName:"li"},"\u6784\u5efa\u4e00\u4e2aStarwhale\u6a21\u578b\uff1a"),(0,r.kt)("pre",{parentName:"li"},(0,r.kt)("code",{parentName:"pre",className:"language-bash"},"swcli model build example/mnist --runtime pytorch\n"))),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("p",{parentName:"li"},"\u68c0\u67e5\u60a8\u672c\u5730\u7684Starwhale\u6a21\u578b\uff1a"),(0,r.kt)("pre",{parentName:"li"},(0,r.kt)("code",{parentName:"pre",className:"language-bash"},"swcli model list\nswcli model info mnist\n")))),(0,r.kt)("h2",{id:"\u6784\u5efa\u6570\u636e\u96c6"},"\u6784\u5efa\u6570\u636e\u96c6"),(0,r.kt)("p",null,"\u6570\u636e\u96c6\u793a\u4f8b\u4ee3\u7801\u4f4d\u4e8e ",(0,r.kt)("inlineCode",{parentName:"p"},"example/mnist")," \u76ee\u5f55\u4e2d\u3002"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("p",{parentName:"li"},"\u4e0b\u8f7dMNIST\u539f\u59cb\u6570\u636e\uff1a"),(0,r.kt)("pre",{parentName:"li"},(0,r.kt)("code",{parentName:"pre",className:"language-bash"},"cd example/mnist\nCN=1 make download-data\n# \u975e\u4e2d\u56fd\u5927\u9646\u7f51\u7edc\u7528\u6237\uff0c\u53ef\u4ee5\u7701\u7565 CN=1 \u73af\u5883\u53d8\u91cf\ncd -\n"))),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("p",{parentName:"li"},"\u6784\u5efaStarwhale\u6570\u636e\u96c6\uff1a"),(0,r.kt)("pre",{parentName:"li"},(0,r.kt)("code",{parentName:"pre",className:"language-bash"},"swcli dataset build --yaml example/mnist/dataset.yaml\n"))),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("p",{parentName:"li"},"\u68c0\u67e5\u60a8\u672c\u5730\u7684Starwhale\u6570\u636e\u96c6\uff1a"),(0,r.kt)("pre",{parentName:"li"},(0,r.kt)("code",{parentName:"pre",className:"language-bash"},"swcli dataset list\nswcli dataset info mnist\nswcli dataset head mnist\n")))),(0,r.kt)("h2",{id:"\u8fd0\u884c\u8bc4\u4f30\u4f5c\u4e1a"},"\u8fd0\u884c\u8bc4\u4f30\u4f5c\u4e1a"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("p",{parentName:"li"},"\u521b\u5efa\u8bc4\u4f30\u5de5\u4f5c"),(0,r.kt)("pre",{parentName:"li"},(0,r.kt)("code",{parentName:"pre",className:"language-bash"},"swcli -vvv model run --uri mnist --dataset mnist --runtime pytorch\n"))),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("p",{parentName:"li"},"\u68c0\u67e5\u8bc4\u4f30\u7ed3\u679c"),(0,r.kt)("pre",{parentName:"li"},(0,r.kt)("code",{parentName:"pre",className:"language-bash"},"swcli job list\nswcli job info $(swcli job list | grep mnist | grep success | awk '{print $1}' | head -n 1)\n")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"\u606d\u559c\uff01 \u60a8\u5df2\u5b8c\u6210Starwhale Standalone\u7684\u5165\u95e8\u6307\u5357\u3002")))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/zh/assets/js/94a33573.dc6c4873.js b/zh/assets/js/94a33573.dc6c4873.js new file mode 100644 index 000000000..4a49e651e --- /dev/null +++ b/zh/assets/js/94a33573.dc6c4873.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunkstarwhale_docs=self.webpackChunkstarwhale_docs||[]).push([[6007],{3905:(e,t,a)=>{a.d(t,{Zo:()=>c,kt:()=>d});var n=a(7294);function l(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function r(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function i(e){for(var t=1;t=0||(l[a]=e[a]);return l}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(l[a]=e[a])}return l}var p=n.createContext({}),s=function(e){var t=n.useContext(p),a=t;return e&&(a="function"==typeof e?e(t):i(i({},t),e)),a},c=function(e){var t=s(e.components);return n.createElement(p.Provider,{value:t},e.children)},m={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},u=n.forwardRef((function(e,t){var a=e.components,l=e.mdxType,r=e.originalType,p=e.parentName,c=o(e,["components","mdxType","originalType","parentName"]),u=s(a),d=l,h=u["".concat(p,".").concat(d)]||u[d]||m[d]||r;return a?n.createElement(h,i(i({ref:t},c),{},{components:a})):n.createElement(h,i({ref:t},c))}));function d(e,t){var a=arguments,l=t&&t.mdxType;if("string"==typeof e||l){var r=a.length,i=new Array(r);i[0]=u;var o={};for(var p in t)hasOwnProperty.call(t,p)&&(o[p]=t[p]);o.originalType=e,o.mdxType="string"==typeof e?e:l,i[1]=o;for(var s=2;s{a.r(t),a.d(t,{assets:()=>p,contentTitle:()=>i,default:()=>m,frontMatter:()=>r,metadata:()=>o,toc:()=>s});var n=a(3117),l=(a(7294),a(3905));const r={title:"Starwhale Standalone\u5165\u95e8\u6307\u5357"},i=void 0,o={unversionedId:"getting-started/standalone",id:"getting-started/standalone",title:"Starwhale Standalone\u5165\u95e8\u6307\u5357",description:"\u5f53Starwhale Client\uff08swcli\uff09\u5b89\u88c5\u5b8c\u6210\u540e\uff0c\u60a8\u5c31\u53ef\u4ee5\u4f7f\u7528Starwhale Standalone\u3002",source:"@site/i18n/zh/docusaurus-plugin-content-docs/current/getting-started/standalone.md",sourceDirName:"getting-started",slug:"/getting-started/standalone",permalink:"/zh/next/getting-started/standalone",draft:!1,editUrl:"https://github.com/star-whale/docs/tree/main/docs/getting-started/standalone.md",tags:[],version:"current",frontMatter:{title:"Starwhale Standalone\u5165\u95e8\u6307\u5357"},sidebar:"mainSidebar",previous:{title:"\u5165\u95e8\u6307\u5357",permalink:"/zh/next/getting-started/"},next:{title:"Starwhale Server\u5165\u95e8\u6307\u5357",permalink:"/zh/next/getting-started/server"}},p={},s=[{value:"\u4e0b\u8f7d\u4f8b\u5b50",id:"\u4e0b\u8f7d\u4f8b\u5b50",level:2},{value:"\u6784\u5efa Starwhale \u8fd0\u884c\u65f6",id:"\u6784\u5efa-starwhale-\u8fd0\u884c\u65f6",level:2},{value:"\u6784\u5efa\u6a21\u578b",id:"\u6784\u5efa\u6a21\u578b",level:2},{value:"\u6784\u5efa\u6570\u636e\u96c6",id:"\u6784\u5efa\u6570\u636e\u96c6",level:2},{value:"\u8fd0\u884c\u8bc4\u4f30\u4f5c\u4e1a",id:"\u8fd0\u884c\u8bc4\u4f30\u4f5c\u4e1a",level:2}],c={toc:s};function m(e){let{components:t,...a}=e;return(0,l.kt)("wrapper",(0,n.Z)({},c,a,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("p",null,"\u5f53",(0,l.kt)("a",{parentName:"p",href:"../swcli/"},"Starwhale Client\uff08swcli\uff09"),"\u5b89\u88c5\u5b8c\u6210\u540e\uff0c\u60a8\u5c31\u53ef\u4ee5\u4f7f\u7528Starwhale Standalone\u3002"),(0,l.kt)("p",null,"\u6211\u4eec\u4e5f\u63d0\u4f9b\u5bf9\u5e94\u7684Jupyter Notebook\u4f8b\u5b50\uff0c\u53ef\u4ee5\u5728 ",(0,l.kt)("a",{parentName:"p",href:"https://colab.research.google.com/github/star-whale/starwhale/blob/main/example/notebooks/quickstart-standalone.ipynb"},"Google Colab")," \u6216\u672c\u5730\u7684 ",(0,l.kt)("a",{parentName:"p",href:"https://github.com/star-whale/starwhale/blob/main/example/notebooks/quickstart-standalone.ipynb"},"vscode/jupyterlab")," \u4e2d\u8bd5\u7528\u3002"),(0,l.kt)("h2",{id:"\u4e0b\u8f7d\u4f8b\u5b50"},"\u4e0b\u8f7d\u4f8b\u5b50"),(0,l.kt)("p",null,"\u901a\u8fc7\u4ee5\u4e0b\u65b9\u5f0f\u514b\u9686Starwhale\u9879\u76ee\u6765\u4e0b\u8f7dStarwhale\u793a\u4f8b\uff1a"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"GIT_LFS_SKIP_SMUDGE=1 git clone https://github.com/star-whale/starwhale.git --depth 1\ncd starwhale\n")),(0,l.kt)("p",null,"\u4e3a\u4e86\u8282\u7701\u4f8b\u5b50\u7684\u4e0b\u8f7d\u65f6\u95f4\uff0c\u6211\u4eec\u6267\u884cgit clone\u547d\u4ee4\u65f6\uff0c\u5ffd\u7565\u4e86git-lfs\uff0c\u5e76\u53ea\u4fdd\u7559\u6700\u8fd1\u4e00\u6b21\u7684commit\u4fe1\u606f\u3002\u6211\u4eec\u9009\u7528ML/DL\u9886\u57df\u7684HelloWorld\u7a0b\u5e8f-MNIST\u6765\u4ecb\u7ecd\u5982\u4f55\u4ece\u96f6\u5f00\u59cb\u6784\u5efa\u6570\u636e\u96c6\u3001\u6a21\u578b\u5305\u548c\u8fd0\u884c\u73af\u5883\uff0c\u5e76\u6700\u7ec8\u5b8c\u6210\u6a21\u578b\u8bc4\u6d4b\u3002\u63a5\u4e0b\u6765\u7684\u64cd\u4f5c\u90fd\u5728 starwhale \u76ee\u5f55\u4e2d\u8fdb\u884c\u3002"),(0,l.kt)("p",null,(0,l.kt)("img",{parentName:"p",src:"https://starwhale-examples.oss-cn-beijing.aliyuncs.com/docs/standalone-core-workflow.gif",alt:"\u6838\u5fc3\u5de5\u4f5c\u6d41\u7a0b"})),(0,l.kt)("h2",{id:"\u6784\u5efa-starwhale-\u8fd0\u884c\u65f6"},"\u6784\u5efa Starwhale \u8fd0\u884c\u65f6"),(0,l.kt)("p",null,"\u8fd0\u884c\u65f6\u793a\u4f8b\u4ee3\u7801\u4f4d\u4e8e",(0,l.kt)("inlineCode",{parentName:"p"},"example/helloworld"),"\u76ee\u5f55\u4e2d\u3002"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("p",{parentName:"li"},"\u6784\u5efaStarwhale\u8fd0\u884c\u65f6\u5305\uff1a"),(0,l.kt)("pre",{parentName:"li"},(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"swcli -vvv runtime build --yaml example/helloworld/runtime.yaml\n")),(0,l.kt)("admonition",{parentName:"li",type:"tip"},(0,l.kt)("p",{parentName:"admonition"},"\u5f53\u9996\u6b21\u6784\u5efaStarwhale Runtime\u65f6\uff0c\u7531\u4e8e\u9700\u8981\u521b\u5efavenv\u6216conda\u9694\u79bb\u73af\u5883\uff0c\u5e76\u4e0b\u8f7d\u76f8\u5173\u7684Python\u4f9d\u8d56\uff0c\u547d\u4ee4\u6267\u884c\u9700\u8981\u82b1\u8d39\u4e00\u6bb5\u65f6\u95f4\u3002\u65f6\u95f4\u957f\u77ed\u53d6\u51b3\u4e0e\u6240\u5728\u673a\u5668\u7684\u7f51\u7edc\u60c5\u51b5\u548cruntime.yaml\u4e2dPython\u4f9d\u8d56\u7684\u6570\u91cf\u3002\u5efa\u8bae\u5408\u7406\u8bbe\u7f6e\u673a\u5668\u7684 ",(0,l.kt)("inlineCode",{parentName:"p"},"~/.pip/pip.conf")," \u6587\u4ef6\uff0c\u586b\u5199\u7f13\u5b58\u8def\u5f84\u548c\u9002\u5408\u5f53\u524d\u7f51\u7edc\u73af\u5883\u7684pypi mirror\u5730\u5740\u3002"),(0,l.kt)("p",{parentName:"admonition"},"\u5904\u4e8e\u4e2d\u56fd\u5927\u9646\u7f51\u7edc\u73af\u5883\u4e2d\u7684\u7528\u6237\uff0c\u53ef\u4ee5\u53c2\u8003\u5982\u4e0b\u914d\u7f6e\uff1a"),(0,l.kt)("pre",{parentName:"admonition"},(0,l.kt)("code",{parentName:"pre",className:"language-conf"},"[global]\ncache-dir = ~/.cache/pip\nindex-url = https://pypi.tuna.tsinghua.edu.cn/simple\nextra-index-url = https://mirrors.aliyun.com/pypi/simple/\n")))),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("p",{parentName:"li"},"\u68c0\u67e5\u60a8\u672c\u5730\u7684Starwhale\u8fd0\u884c\u65f6\uff1a"),(0,l.kt)("pre",{parentName:"li"},(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"swcli runtime list\nswcli runtime info helloworld\n")))),(0,l.kt)("h2",{id:"\u6784\u5efa\u6a21\u578b"},"\u6784\u5efa\u6a21\u578b"),(0,l.kt)("p",null,"\u6a21\u578b\u793a\u4f8b\u4ee3\u7801\u4f4d\u4e8e ",(0,l.kt)("inlineCode",{parentName:"p"},"example/helloworld")," \u76ee\u5f55\u4e2d\u3002"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("p",{parentName:"li"},"\u6784\u5efa\u4e00\u4e2aStarwhale\u6a21\u578b\uff1a"),(0,l.kt)("pre",{parentName:"li"},(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"swcli -vvv model build example/helloworld --name helloworld -m evaluation --runtime helloworld\n"))),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("p",{parentName:"li"},"\u68c0\u67e5\u60a8\u672c\u5730\u7684Starwhale\u6a21\u578b\uff1a"),(0,l.kt)("pre",{parentName:"li"},(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"swcli model list\nswcli model info helloworld\n")))),(0,l.kt)("h2",{id:"\u6784\u5efa\u6570\u636e\u96c6"},"\u6784\u5efa\u6570\u636e\u96c6"),(0,l.kt)("p",null,"\u6570\u636e\u96c6\u793a\u4f8b\u4ee3\u7801\u4f4d\u4e8e ",(0,l.kt)("inlineCode",{parentName:"p"},"example/helloworld")," \u76ee\u5f55\u4e2d\u3002"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("p",{parentName:"li"},"\u6784\u5efaStarwhale\u6570\u636e\u96c6\uff1a"),(0,l.kt)("pre",{parentName:"li"},(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"swcli runtime activate helloworld\npython3 example/helloworld/dataset.py\ndeactivate\n"))),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("p",{parentName:"li"},"\u68c0\u67e5\u60a8\u672c\u5730\u7684Starwhale\u6570\u636e\u96c6\uff1a"),(0,l.kt)("pre",{parentName:"li"},(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"swcli dataset list\nswcli dataset info mnist64\nswcli dataset head mnist64\n")))),(0,l.kt)("h2",{id:"\u8fd0\u884c\u8bc4\u4f30\u4f5c\u4e1a"},"\u8fd0\u884c\u8bc4\u4f30\u4f5c\u4e1a"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("p",{parentName:"li"},"\u521b\u5efa\u8bc4\u4f30\u5de5\u4f5c"),(0,l.kt)("pre",{parentName:"li"},(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"swcli -vvv model run --uri helloworld --dataset mnist64 --runtime helloworld\n"))),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("p",{parentName:"li"},"\u68c0\u67e5\u8bc4\u4f30\u7ed3\u679c"),(0,l.kt)("pre",{parentName:"li"},(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"swcli job list\nswcli job info $(swcli job list | grep mnist | grep success | awk '{print $1}' | head -n 1)\n")))),(0,l.kt)("p",null,(0,l.kt)("strong",{parentName:"p"},"\u606d\u559c\uff01 \u60a8\u5df2\u5b8c\u6210Starwhale Standalone\u7684\u5165\u95e8\u6307\u5357\u3002")))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/zh/assets/js/d8b4b029.5d9c1fa7.js b/zh/assets/js/d8b4b029.3b540579.js similarity index 79% rename from zh/assets/js/d8b4b029.5d9c1fa7.js rename to zh/assets/js/d8b4b029.3b540579.js index b1fb19fcc..d37683538 100644 --- a/zh/assets/js/d8b4b029.5d9c1fa7.js +++ b/zh/assets/js/d8b4b029.3b540579.js @@ -1 +1 @@ -"use strict";(self.webpackChunkstarwhale_docs=self.webpackChunkstarwhale_docs||[]).push([[5018],{3905:(e,t,r)=>{r.d(t,{Zo:()=>p,kt:()=>m});var a=r(7294);function n(e,t,r){return t in e?Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[t]=r,e}function l(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,a)}return r}function o(e){for(var t=1;t=0||(n[r]=e[r]);return n}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(n[r]=e[r])}return n}var s=a.createContext({}),c=function(e){var t=a.useContext(s),r=t;return e&&(r="function"==typeof e?e(t):o(o({},t),e)),r},p=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},u={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},d=a.forwardRef((function(e,t){var r=e.components,n=e.mdxType,l=e.originalType,s=e.parentName,p=i(e,["components","mdxType","originalType","parentName"]),d=c(r),m=n,v=d["".concat(s,".").concat(m)]||d[m]||u[m]||l;return r?a.createElement(v,o(o({ref:t},p),{},{components:r})):a.createElement(v,o({ref:t},p))}));function m(e,t){var r=arguments,n=t&&t.mdxType;if("string"==typeof e||n){var l=r.length,o=new Array(l);o[0]=d;var i={};for(var s in t)hasOwnProperty.call(t,s)&&(i[s]=t[s]);i.originalType=e,i.mdxType="string"==typeof e?e:n,o[1]=i;for(var c=2;c{r.r(t),r.d(t,{assets:()=>s,contentTitle:()=>o,default:()=>u,frontMatter:()=>l,metadata:()=>i,toc:()=>c});var a=r(3117),n=(r(7294),r(3905));const l={title:"Starwhale Server\u5165\u95e8\u6307\u5357"},o=void 0,i={unversionedId:"getting-started/server",id:"getting-started/server",title:"Starwhale Server\u5165\u95e8\u6307\u5357",description:"\u5b89\u88c5Starwhale Server",source:"@site/i18n/zh/docusaurus-plugin-content-docs/current/getting-started/server.md",sourceDirName:"getting-started",slug:"/getting-started/server",permalink:"/zh/next/getting-started/server",draft:!1,editUrl:"https://github.com/star-whale/docs/tree/main/docs/getting-started/server.md",tags:[],version:"current",frontMatter:{title:"Starwhale Server\u5165\u95e8\u6307\u5357"},sidebar:"mainSidebar",previous:{title:"Starwhale Standalone\u5165\u95e8\u6307\u5357",permalink:"/zh/next/getting-started/standalone"},next:{title:"Starwhale Cloud\u5165\u95e8\u6307\u5357",permalink:"/zh/next/getting-started/cloud"}},s={},c=[{value:"\u5b89\u88c5Starwhale Server",id:"\u5b89\u88c5starwhale-server",level:2},{value:"\u521b\u5efa\u60a8\u7684\u7b2c\u4e00\u4e2a\u9879\u76ee",id:"\u521b\u5efa\u60a8\u7684\u7b2c\u4e00\u4e2a\u9879\u76ee",level:2},{value:"\u767b\u5f55\u670d\u52a1\u5668",id:"\u767b\u5f55\u670d\u52a1\u5668",level:3},{value:"\u521b\u5efa\u4e00\u4e2a\u65b0\u9879\u76ee",id:"\u521b\u5efa\u4e00\u4e2a\u65b0\u9879\u76ee",level:3},{value:"\u5728\u672c\u5730\u673a\u5668\u4e0a\u6784\u5efa\u6570\u636e\u96c6\u3001\u6a21\u578b\u548c\u8fd0\u884c\u65f6",id:"\u5728\u672c\u5730\u673a\u5668\u4e0a\u6784\u5efa\u6570\u636e\u96c6\u6a21\u578b\u548c\u8fd0\u884c\u65f6",level:2},{value:"\u5c06\u6570\u636e\u96c6\u3001\u6a21\u578b\u548c\u8fd0\u884c\u65f6\u590d\u5236\u5230Starwhale Server",id:"\u5c06\u6570\u636e\u96c6\u6a21\u578b\u548c\u8fd0\u884c\u65f6\u590d\u5236\u5230starwhale-server",level:2},{value:"\u4f7f\u7528Web UI\u8fd0\u884c\u6a21\u578b\u8bc4\u4f30",id:"\u4f7f\u7528web-ui\u8fd0\u884c\u6a21\u578b\u8bc4\u4f30",level:2}],p={toc:c};function u(e){let{components:t,...r}=e;return(0,n.kt)("wrapper",(0,a.Z)({},p,r,{components:t,mdxType:"MDXLayout"}),(0,n.kt)("h2",{id:"\u5b89\u88c5starwhale-server"},"\u5b89\u88c5Starwhale Server"),(0,n.kt)("p",null,"\u5b89\u88c5 Starwhale Server\uff0c\u53c2\u89c1",(0,n.kt)("a",{parentName:"p",href:"/zh/next/server/installation/"},"\u5b89\u88c5\u6307\u5357"),"\u3002"),(0,n.kt)("h2",{id:"\u521b\u5efa\u60a8\u7684\u7b2c\u4e00\u4e2a\u9879\u76ee"},"\u521b\u5efa\u60a8\u7684\u7b2c\u4e00\u4e2a\u9879\u76ee"),(0,n.kt)("h3",{id:"\u767b\u5f55\u670d\u52a1\u5668"},"\u767b\u5f55\u670d\u52a1\u5668"),(0,n.kt)("p",null,"\u6253\u5f00\u6d4f\u89c8\u5668\u5e76\u5728\u5730\u5740\u680f\u4e2d\u8f93\u5165\u670d\u52a1\u5668\u7684 URL\u3002 \u4f7f\u7528\u9ed8\u8ba4\u7528\u6237\u540d\uff08starwhale\uff09\u548c\u5bc6\u7801\uff08abcd1234\uff09\u767b\u5f55\u3002"),(0,n.kt)("p",null,(0,n.kt)("img",{parentName:"p",src:"https://starwhale-examples.oss-cn-beijing.aliyuncs.com/docs/console-artifacts.gif",alt:"console-artifacts.gif"})),(0,n.kt)("h3",{id:"\u521b\u5efa\u4e00\u4e2a\u65b0\u9879\u76ee"},"\u521b\u5efa\u4e00\u4e2a\u65b0\u9879\u76ee"),(0,n.kt)("h2",{id:"\u5728\u672c\u5730\u673a\u5668\u4e0a\u6784\u5efa\u6570\u636e\u96c6\u6a21\u578b\u548c\u8fd0\u884c\u65f6"},"\u5728\u672c\u5730\u673a\u5668\u4e0a\u6784\u5efa\u6570\u636e\u96c6\u3001\u6a21\u578b\u548c\u8fd0\u884c\u65f6"),(0,n.kt)("p",null,"\u6309\u7167",(0,n.kt)("a",{parentName:"p",href:"standalone"},"Starwhale Standalone\u5165\u95e8\u6307\u5357"),"\u4e2d\u7684\u6b65\u9aa41\u5230\u6b65\u9aa44\u5728\u672c\u5730\u673a\u5668\u4e0a\u521b\u5efa\uff1a"),(0,n.kt)("ul",null,(0,n.kt)("li",{parentName:"ul"},"\u4e00\u4e2a\u540d\u4e3amnist\u7684Starwhale\u6a21\u578b"),(0,n.kt)("li",{parentName:"ul"},"\u4e00\u4e2a\u540d\u4e3amnist\u7684Starwhale\u6570\u636e\u96c6"),(0,n.kt)("li",{parentName:"ul"},"\u4e00\u4e2a\u540d\u4e3apytorch\u7684Starwhale\u8fd0\u884c\u65f6")),(0,n.kt)("h2",{id:"\u5c06\u6570\u636e\u96c6\u6a21\u578b\u548c\u8fd0\u884c\u65f6\u590d\u5236\u5230starwhale-server"},"\u5c06\u6570\u636e\u96c6\u3001\u6a21\u578b\u548c\u8fd0\u884c\u65f6\u590d\u5236\u5230Starwhale Server"),(0,n.kt)("pre",null,(0,n.kt)("code",{parentName:"pre",className:"language-bash"},"swcli instance login --username --password --alias server \n\nswcli model copy mnist server/project/demo\nswcli dataset copy mnistserver/project/demo\nswcli runtime copy pytorch server/project/demo\n")),(0,n.kt)("h2",{id:"\u4f7f\u7528web-ui\u8fd0\u884c\u6a21\u578b\u8bc4\u4f30"},"\u4f7f\u7528Web UI\u8fd0\u884c\u6a21\u578b\u8bc4\u4f30"),(0,n.kt)("p",null,"\u4f7f\u7528\u6d4f\u89c8\u5668\u6253\u5f00\u201cdemo\u201d\u9879\u76ee\u5e76\u521b\u5efa\u4e00\u4e2a\u65b0\u7684\u8bc4\u4f30\u3002"),(0,n.kt)("p",null,(0,n.kt)("img",{parentName:"p",src:"https://starwhale-examples.oss-cn-beijing.aliyuncs.com/docs/console-create-job.gif",alt:"console-create-job.gif"})),(0,n.kt)("p",null,(0,n.kt)("strong",{parentName:"p"},"\u606d\u559c\uff01 \u60a8\u5df2\u5b8c\u6210Starwhale Server\u7684\u5165\u95e8\u6307\u5357\u3002")))}u.isMDXComponent=!0}}]); \ No newline at end of file +"use strict";(self.webpackChunkstarwhale_docs=self.webpackChunkstarwhale_docs||[]).push([[5018],{3905:(e,t,r)=>{r.d(t,{Zo:()=>p,kt:()=>m});var a=r(7294);function n(e,t,r){return t in e?Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[t]=r,e}function l(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,a)}return r}function o(e){for(var t=1;t=0||(n[r]=e[r]);return n}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(n[r]=e[r])}return n}var s=a.createContext({}),c=function(e){var t=a.useContext(s),r=t;return e&&(r="function"==typeof e?e(t):o(o({},t),e)),r},p=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},u={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},d=a.forwardRef((function(e,t){var r=e.components,n=e.mdxType,l=e.originalType,s=e.parentName,p=i(e,["components","mdxType","originalType","parentName"]),d=c(r),m=n,v=d["".concat(s,".").concat(m)]||d[m]||u[m]||l;return r?a.createElement(v,o(o({ref:t},p),{},{components:r})):a.createElement(v,o({ref:t},p))}));function m(e,t){var r=arguments,n=t&&t.mdxType;if("string"==typeof e||n){var l=r.length,o=new Array(l);o[0]=d;var i={};for(var s in t)hasOwnProperty.call(t,s)&&(i[s]=t[s]);i.originalType=e,i.mdxType="string"==typeof e?e:n,o[1]=i;for(var c=2;c{r.r(t),r.d(t,{assets:()=>s,contentTitle:()=>o,default:()=>u,frontMatter:()=>l,metadata:()=>i,toc:()=>c});var a=r(3117),n=(r(7294),r(3905));const l={title:"Starwhale Server\u5165\u95e8\u6307\u5357"},o=void 0,i={unversionedId:"getting-started/server",id:"getting-started/server",title:"Starwhale Server\u5165\u95e8\u6307\u5357",description:"\u5b89\u88c5Starwhale Server",source:"@site/i18n/zh/docusaurus-plugin-content-docs/current/getting-started/server.md",sourceDirName:"getting-started",slug:"/getting-started/server",permalink:"/zh/next/getting-started/server",draft:!1,editUrl:"https://github.com/star-whale/docs/tree/main/docs/getting-started/server.md",tags:[],version:"current",frontMatter:{title:"Starwhale Server\u5165\u95e8\u6307\u5357"},sidebar:"mainSidebar",previous:{title:"Starwhale Standalone\u5165\u95e8\u6307\u5357",permalink:"/zh/next/getting-started/standalone"},next:{title:"Starwhale Cloud\u5165\u95e8\u6307\u5357",permalink:"/zh/next/getting-started/cloud"}},s={},c=[{value:"\u5b89\u88c5Starwhale Server",id:"\u5b89\u88c5starwhale-server",level:2},{value:"\u521b\u5efa\u60a8\u7684\u7b2c\u4e00\u4e2a\u9879\u76ee",id:"\u521b\u5efa\u60a8\u7684\u7b2c\u4e00\u4e2a\u9879\u76ee",level:2},{value:"\u767b\u5f55\u670d\u52a1\u5668",id:"\u767b\u5f55\u670d\u52a1\u5668",level:3},{value:"\u521b\u5efa\u4e00\u4e2a\u65b0\u9879\u76ee",id:"\u521b\u5efa\u4e00\u4e2a\u65b0\u9879\u76ee",level:3},{value:"\u5728\u672c\u5730\u673a\u5668\u4e0a\u6784\u5efa\u6570\u636e\u96c6\u3001\u6a21\u578b\u548c\u8fd0\u884c\u65f6",id:"\u5728\u672c\u5730\u673a\u5668\u4e0a\u6784\u5efa\u6570\u636e\u96c6\u6a21\u578b\u548c\u8fd0\u884c\u65f6",level:2},{value:"\u5c06\u6570\u636e\u96c6\u3001\u6a21\u578b\u548c\u8fd0\u884c\u65f6\u590d\u5236\u5230Starwhale Server",id:"\u5c06\u6570\u636e\u96c6\u6a21\u578b\u548c\u8fd0\u884c\u65f6\u590d\u5236\u5230starwhale-server",level:2},{value:"\u4f7f\u7528Web UI\u8fd0\u884c\u6a21\u578b\u8bc4\u4f30",id:"\u4f7f\u7528web-ui\u8fd0\u884c\u6a21\u578b\u8bc4\u4f30",level:2}],p={toc:c};function u(e){let{components:t,...r}=e;return(0,n.kt)("wrapper",(0,a.Z)({},p,r,{components:t,mdxType:"MDXLayout"}),(0,n.kt)("h2",{id:"\u5b89\u88c5starwhale-server"},"\u5b89\u88c5Starwhale Server"),(0,n.kt)("p",null,"\u5b89\u88c5 Starwhale Server\uff0c\u53c2\u89c1",(0,n.kt)("a",{parentName:"p",href:"/zh/next/server/installation/"},"\u5b89\u88c5\u6307\u5357"),"\u3002"),(0,n.kt)("h2",{id:"\u521b\u5efa\u60a8\u7684\u7b2c\u4e00\u4e2a\u9879\u76ee"},"\u521b\u5efa\u60a8\u7684\u7b2c\u4e00\u4e2a\u9879\u76ee"),(0,n.kt)("h3",{id:"\u767b\u5f55\u670d\u52a1\u5668"},"\u767b\u5f55\u670d\u52a1\u5668"),(0,n.kt)("p",null,"\u6253\u5f00\u6d4f\u89c8\u5668\u5e76\u5728\u5730\u5740\u680f\u4e2d\u8f93\u5165\u670d\u52a1\u5668\u7684 URL\u3002 \u4f7f\u7528\u9ed8\u8ba4\u7528\u6237\u540d\uff08starwhale\uff09\u548c\u5bc6\u7801\uff08abcd1234\uff09\u767b\u5f55\u3002"),(0,n.kt)("p",null,(0,n.kt)("img",{parentName:"p",src:"https://starwhale-examples.oss-cn-beijing.aliyuncs.com/docs/console-artifacts.gif",alt:"console-artifacts.gif"})),(0,n.kt)("h3",{id:"\u521b\u5efa\u4e00\u4e2a\u65b0\u9879\u76ee"},"\u521b\u5efa\u4e00\u4e2a\u65b0\u9879\u76ee"),(0,n.kt)("h2",{id:"\u5728\u672c\u5730\u673a\u5668\u4e0a\u6784\u5efa\u6570\u636e\u96c6\u6a21\u578b\u548c\u8fd0\u884c\u65f6"},"\u5728\u672c\u5730\u673a\u5668\u4e0a\u6784\u5efa\u6570\u636e\u96c6\u3001\u6a21\u578b\u548c\u8fd0\u884c\u65f6"),(0,n.kt)("p",null,"\u6309\u7167",(0,n.kt)("a",{parentName:"p",href:"standalone"},"Starwhale Standalone\u5165\u95e8\u6307\u5357"),"\u4e2d\u7684\u6b65\u9aa41\u5230\u6b65\u9aa44\u5728\u672c\u5730\u673a\u5668\u4e0a\u521b\u5efa\uff1a"),(0,n.kt)("ul",null,(0,n.kt)("li",{parentName:"ul"},"\u4e00\u4e2a\u540d\u4e3ahelloworld\u7684Starwhale\u6a21\u578b"),(0,n.kt)("li",{parentName:"ul"},"\u4e00\u4e2a\u540d\u4e3amnist64\u7684Starwhale\u6570\u636e\u96c6"),(0,n.kt)("li",{parentName:"ul"},"\u4e00\u4e2a\u540d\u4e3ahelloworld\u7684Starwhale\u8fd0\u884c\u65f6")),(0,n.kt)("h2",{id:"\u5c06\u6570\u636e\u96c6\u6a21\u578b\u548c\u8fd0\u884c\u65f6\u590d\u5236\u5230starwhale-server"},"\u5c06\u6570\u636e\u96c6\u3001\u6a21\u578b\u548c\u8fd0\u884c\u65f6\u590d\u5236\u5230Starwhale Server"),(0,n.kt)("pre",null,(0,n.kt)("code",{parentName:"pre",className:"language-bash"},"swcli instance login --username --password --alias server \n\nswcli model copy helloworld server/project/demo\nswcli dataset copy mnist64 server/project/demo\nswcli runtime copy helloworld server/project/demo\n")),(0,n.kt)("h2",{id:"\u4f7f\u7528web-ui\u8fd0\u884c\u6a21\u578b\u8bc4\u4f30"},"\u4f7f\u7528Web UI\u8fd0\u884c\u6a21\u578b\u8bc4\u4f30"),(0,n.kt)("p",null,"\u4f7f\u7528\u6d4f\u89c8\u5668\u6253\u5f00\u201cdemo\u201d\u9879\u76ee\u5e76\u521b\u5efa\u4e00\u4e2a\u65b0\u7684\u8bc4\u4f30\u3002"),(0,n.kt)("p",null,(0,n.kt)("img",{parentName:"p",src:"https://starwhale-examples.oss-cn-beijing.aliyuncs.com/docs/console-create-job.gif",alt:"console-create-job.gif"})),(0,n.kt)("p",null,(0,n.kt)("strong",{parentName:"p"},"\u606d\u559c\uff01 \u60a8\u5df2\u5b8c\u6210Starwhale Server\u7684\u5165\u95e8\u6307\u5357\u3002")))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/zh/assets/js/runtime~main.c6a269b9.js b/zh/assets/js/runtime~main.1c413bf6.js similarity index 98% rename from zh/assets/js/runtime~main.c6a269b9.js rename to zh/assets/js/runtime~main.1c413bf6.js index 2532a406f..60d5f47f3 100644 --- a/zh/assets/js/runtime~main.c6a269b9.js +++ b/zh/assets/js/runtime~main.1c413bf6.js @@ -1 +1 @@ -(()=>{"use strict";var e,c,b,a,d,f={},t={};function r(e){var c=t[e];if(void 0!==c)return c.exports;var b=t[e]={exports:{}};return f[e].call(b.exports,b,b.exports,r),b.exports}r.m=f,e=[],r.O=(c,b,a,d)=>{if(!b){var f=1/0;for(i=0;i=d)&&Object.keys(r.O).every((e=>r.O[e](b[o])))?b.splice(o--,1):(t=!1,d0&&e[i-1][2]>d;i--)e[i]=e[i-1];e[i]=[b,a,d]},r.n=e=>{var c=e&&e.__esModule?()=>e.default:()=>e;return r.d(c,{a:c}),c},b=Object.getPrototypeOf?e=>Object.getPrototypeOf(e):e=>e.__proto__,r.t=function(e,a){if(1&a&&(e=this(e)),8&a)return e;if("object"==typeof e&&e){if(4&a&&e.__esModule)return e;if(16&a&&"function"==typeof e.then)return e}var d=Object.create(null);r.r(d);var f={};c=c||[null,b({}),b([]),b(b)];for(var t=2&a&&e;"object"==typeof t&&!~c.indexOf(t);t=b(t))Object.getOwnPropertyNames(t).forEach((c=>f[c]=()=>e[c]));return f.default=()=>e,r.d(d,f),d},r.d=(e,c)=>{for(var b in c)r.o(c,b)&&!r.o(e,b)&&Object.defineProperty(e,b,{enumerable:!0,get:c[b]})},r.f={},r.e=e=>Promise.all(Object.keys(r.f).reduce(((c,b)=>(r.f[b](e,c),c)),[])),r.u=e=>"assets/js/"+({12:"72415513",16:"b9f79b01",53:"935f2afb",100:"8da59ea2",130:"7c31b07e",132:"d39000ab",133:"4a58b7c0",145:"cd72de2f",159:"c48b3342",215:"cf3566e4",234:"b4d07a96",295:"e1148302",313:"0da56a64",321:"38c59d5b",413:"e04d51ee",418:"9f7d30c4",461:"4ccb9248",467:"379b0c58",470:"50e13b3d",488:"731dc59d",538:"e43103c6",556:"e7ee4ae3",596:"9e437ba1",643:"d26bc519",676:"6ceeb001",704:"62ab62ad",721:"baae20c3",760:"1762fbf2",787:"6bf9f171",815:"7b18e4c0",834:"51d02626",863:"1d7100ea",873:"77e36de8",881:"ca1b85bb",919:"43a14e67",925:"2b086592",929:"1e84151d",962:"20d3256d",993:"543b187e",1004:"112df760",1045:"927d70e2",1056:"dffb6346",1080:"92fca8a9",1096:"5017fcbf",1112:"eed9c836",1119:"b9ef3ed8",1156:"6878fabc",1250:"b417d1c4",1263:"1cda5be9",1271:"1608ab83",1277:"3c1bdf30",1338:"270b7806",1377:"70d06172",1383:"7a9241bc",1422:"1beb5f05",1431:"bc8b0dd1",1493:"9e329834",1533:"d1ac0210",1536:"11371c7b",1687:"17c2f848",1692:"954cc45f",1704:"84b32dd0",1767:"86b1c339",1877:"881e0748",1895:"b16f049f",1908:"7181469a",1988:"9f58059d",2096:"8b23682e",2123:"fe87dcf0",2134:"80c5696e",2157:"f4c49af0",2166:"202d83c4",2235:"cd3a0e17",2263:"f313c30f",2281:"66428d6c",2313:"b879cbc2",2331:"910f4e82",2358:"f5714690",2378:"0782712a",2389:"e80f61e7",2411:"0a4c69ff",2440:"c1282265",2453:"96340789",2457:"403c571c",2493:"58f10d9f",2500:"4f907a97",2527:"2a61987d",2528:"d56f4e5c",2535:"814f3328",2558:"c61247c2",2648:"ba50f2ff",2669:"88e9a799",2692:"74f2bbca",2782:"415ee474",2812:"124d40e3",2821:"f9e0d34e",2849:"143cfaa9",2870:"7d88e7d2",2876:"d351013f",2902:"8053dc0f",2916:"15458654",2934:"5cde1b65",2957:"54156619",2972:"e60d5bd6",3089:"a6aa9e1f",3097:"a9712585",3150:"066b2a0b",3186:"73601713",3201:"014c7df5",3247:"6ed47448",3256:"a45d2ac1",3257:"eb21efd4",3321:"0dbd89b2",3348:"e762c649",3401:"a5e4b368",3459:"e92a8c5e",3521:"85d06a42",3526:"f53bb910",3608:"9e4087bc",3648:"d493a5c5",3722:"3ccbc832",3754:"dff578d5",3760:"07c9474b",3902:"5c0fd647",3919:"5d3ff7ab",3973:"a5a06580",4001:"1446f4f7",4008:"0d5877a0",4013:"01a85c17",4037:"2fc4b8da",4046:"fdd53699",4124:"a9d1e7f6",4230:"327c535f",4236:"a34791fd",4242:"ab54f79f",4317:"c4cf3770",4347:"cc9603cd",4381:"13ed40da",4434:"578e670f",4467:"89d440ea",4590:"c44400de",4612:"92395540",4688:"1715a123",4696:"107ec5a7",4750:"94dccd29",4793:"55019ac7",4804:"e37f1ba3",4805:"73bbb900",4820:"3c5cb19f",4896:"7a8da0ce",4959:"59c16786",5018:"d8b4b029",5033:"ec22bf99",5034:"bfe407bc",5082:"21bbd13a",5115:"00bb1e00",5162:"07ff6a70",5205:"90b776a0",5237:"487043b0",5304:"8f1c0305",5309:"6e757243",5334:"b40c04b1",5363:"ab807b25",5394:"3a0f4a69",5397:"24e166e6",5434:"b19a7bfe",5466:"032a8bf9",5509:"a186c25a",5522:"d65d4688",5594:"d5b67f64",5631:"f597edc1",5635:"7c009c71",5665:"02693ce6",5672:"737eebb2",5673:"507190c7",5715:"40b162a0",5725:"f626a0b2",5737:"8ba06492",5833:"107adbf4",5885:"2644419a",5909:"bfb16af5",5917:"bc1c86ee",5926:"882f4d3d",5996:"3a601ba2",6002:"8c4bb804",6007:"94a33573",6041:"637eb1a0",6057:"bc4d1ce3",6072:"51f9b954",6076:"2ea24d9b",6103:"ccc49370",6108:"f05bb5a0",6165:"06fdd901",6174:"83e95488",6225:"536f76fa",6266:"eb7fea5c",6271:"9586d72e",6272:"8d1c02c8",6282:"9e0d0da6",6314:"af3c634e",6394:"d08e2a81",6406:"7c060db6",6424:"fd87e000",6467:"18fbe5ee",6488:"9f791bec",6494:"d609b4c3",6561:"b2073e66",6569:"6e888f0d",6575:"8960c193",6590:"321c6c64",6608:"6e93d602",6617:"c41bf2e2",6693:"d1a5f29f",6768:"c1c06e85",6808:"b123388a",6876:"6247c2cc",6894:"dc9acbda",6923:"5c28baca",6956:"4bc5f74d",6970:"96deed6f",6992:"5b17db12",7042:"dacd3dea",7091:"2dad1b44",7184:"318e02a0",7190:"8fa3b36e",7203:"11204aad",7235:"97879929",7298:"3752a250",7317:"3379d616",7336:"909ad827",7380:"4d8ebb6f",7389:"f3b9a48c",7475:"996e285a",7477:"eaa35b60",7486:"d41dfaf2",7490:"fbf0a0a7",7537:"51092b10",7544:"704d4994",7553:"3d27d96a",7622:"b770e4bf",7653:"5e0c78db",7686:"8edd5372",7723:"8fdf1696",7733:"5cbbb477",7756:"7f203d3a",7758:"5dae2df7",7775:"adaef9b8",7796:"28802b5e",7840:"b493ec7e",7856:"4dbb640a",7899:"45ba351f",7918:"17896441",7921:"a48dc77d",7938:"2eb13a6c",7955:"1d3d1868",7968:"3c0733f4",7976:"f74c1be1",7994:"1e1ca5c3",8e3:"6c71880e",8067:"6e77e7b8",8171:"d459d9f3",8212:"77a8db22",8233:"f5e09728",8271:"1c091541",8397:"a7f6c740",8435:"4124102e",8444:"c9d7c2f8",8446:"4d3733db",8525:"47b83913",8553:"8c932100",8610:"6875c492",8662:"4e5a481b",8690:"a62258e3",8744:"b7c0001e",8800:"3a332aed",8802:"ae41fcbe",8829:"76cec9f8",8834:"15cfd9ba",8862:"31aac386",8957:"8329e158",8963:"cf2d7334",8999:"c0d3eacc",9002:"d9f20a44",9004:"4c2eab68",9015:"bcec44d8",9037:"6c99aaf6",9089:"00f6213b",9132:"7d04adcf",9169:"01c62eef",9176:"37b7e04d",9204:"4f008f40",9297:"066306de",9305:"f8858eb0",9329:"1a11b136",9331:"8889abfd",9334:"247783bb",9371:"41971db3",9397:"1cee4349",9442:"11017be0",9447:"d706acb7",9513:"36907f35",9514:"1be78505",9523:"912b1c2d",9558:"c79576d7",9626:"69347124",9633:"ee9a0869",9673:"3d5161a6",9689:"b7ae29c0",9697:"53970ccf",9836:"17b2ed56",9869:"6d73ef2c",9915:"439c00c0",9929:"33bc9f06",9972:"8aaa1eea",9977:"b4922c9c"}[e]||e)+"."+{12:"d8872a49",16:"b65cf98b",53:"f1489cc9",100:"a702ba12",130:"4b8d3802",132:"e63c79af",133:"9c09623f",145:"f14b6708",159:"ca896a77",215:"c0374aca",234:"0cee5842",295:"e90ae55d",313:"0ad6b86b",321:"0d8da4c9",413:"349dff1a",418:"2fd84db5",461:"2144e990",467:"79c0fdd4",470:"cbd1abcc",488:"6d3b6168",538:"0b2dd499",556:"33abf59c",596:"43c7ccd3",643:"b71d5ae8",676:"d926f7df",704:"ce8e9343",721:"0d2d637e",760:"810bfcc8",787:"59ca4ce1",815:"935c1b04",834:"a45a547c",863:"8c334f49",873:"29f1d5c1",881:"d005c7d5",919:"951ea82a",925:"323d032c",929:"6ad3739e",962:"312abaae",993:"c7b8c6c8",1004:"b164d5a2",1045:"44b3d87f",1056:"785cd139",1080:"1edb98dd",1096:"3f2d4542",1112:"681a4c21",1119:"741f7520",1156:"ec4b2395",1250:"42df2345",1263:"d479a02e",1271:"a7934094",1277:"cbd77d01",1338:"08fbae57",1377:"448c5b22",1383:"3a748658",1422:"96013b92",1431:"157cb260",1493:"b8a6cde2",1533:"efbd8c75",1536:"3be3ac5b",1687:"0ef58157",1692:"b7ccbda0",1704:"cd5b9120",1767:"53382bc2",1877:"35ce2638",1895:"a1e15660",1908:"f7eb0c8a",1988:"cc4710e9",2096:"1090cdf9",2123:"91316e7f",2134:"edfdff3d",2157:"74af8b0a",2166:"2cb27060",2235:"4adb1c6a",2263:"f4e51d51",2281:"3e7fac84",2313:"283d1cd1",2331:"7529f4bf",2358:"d8e3d9e2",2378:"4a777b00",2389:"56e5cac8",2411:"12866a64",2440:"9a5b64a7",2453:"fdfeed25",2457:"48594ca2",2493:"92134bdb",2500:"762311d9",2527:"62f16826",2528:"808540dc",2535:"96ccfae5",2558:"301dac69",2648:"082c3309",2669:"4b611d9e",2692:"fc24668b",2782:"b741e5f6",2812:"3a35840e",2821:"dc077a84",2849:"ed864c7d",2870:"da7f5571",2876:"9d510a98",2902:"2a7e386b",2916:"1b2f8580",2934:"ff8a3c0a",2957:"4d239f4e",2972:"e1213d74",3089:"63fb042a",3097:"a4039743",3150:"47f31dba",3186:"a266f556",3201:"79713ad8",3247:"bce95564",3256:"3799aae9",3257:"8f26899c",3321:"f3bdeac7",3348:"dbc87932",3401:"4bedf366",3459:"518e6303",3521:"63347d55",3526:"c8147363",3608:"b5df34c4",3648:"b6d0af57",3722:"09e3fdfd",3754:"e5882e0c",3760:"4273a0a9",3902:"d0625857",3919:"4efbebc3",3973:"e7578bf9",4001:"a2680011",4008:"647636be",4013:"3480387e",4037:"9cbef143",4046:"c1958c8b",4124:"81b022e4",4230:"7faa4767",4236:"947579e3",4242:"8a5ce29e",4317:"d598cb80",4347:"055d4814",4381:"ae186727",4434:"ac20ca5c",4467:"cc901cb4",4590:"9babec49",4612:"14d79fb0",4688:"5b398a5d",4696:"b2f65e4d",4750:"a807be9e",4793:"0a40e499",4804:"7bf107eb",4805:"3142425c",4820:"a079012a",4896:"6d3a0709",4959:"752c2be2",4972:"fcd66616",5018:"5d9c1fa7",5033:"201ac3d8",5034:"89bcc7e5",5082:"3b10bd92",5115:"3be8c6b6",5162:"74d600a5",5205:"348977ca",5237:"503bb9df",5304:"ea2b105d",5309:"af717168",5334:"e0ccd0ee",5363:"fe3bc966",5394:"38eefc16",5397:"2382a9f0",5434:"d11ae891",5466:"476b2c92",5509:"bbd01ee2",5522:"44155dae",5594:"2a2abff5",5631:"4f002de9",5635:"8f80d8ab",5665:"e4d8002e",5672:"1ade5d43",5673:"4cabdce6",5715:"30a9cb7f",5725:"823b9584",5737:"dfa5d48a",5833:"ad29a42a",5885:"9af574de",5909:"43ff5e05",5917:"0807d186",5926:"8f8525f3",5996:"78cc09dc",6002:"cfec3c1c",6007:"c6306e92",6041:"400198c8",6048:"341e3f6b",6057:"a272f844",6072:"c345c2ce",6076:"ec398d05",6103:"6909e0f1",6108:"a4eb8638",6165:"ef602118",6174:"6d142f10",6225:"42f48da9",6266:"c167dad4",6271:"2d4f02fc",6272:"8c65c844",6282:"4a0697a4",6314:"043e7f92",6394:"4066fd6f",6406:"9a63d5aa",6424:"da2b477d",6467:"3232d289",6488:"de71920e",6494:"6294f46c",6561:"73fb34dc",6569:"638f7a1e",6575:"2d66ddfe",6590:"a9d73a94",6608:"299e34eb",6617:"f4381c61",6693:"c720efe4",6768:"47d38d56",6808:"c9f60e30",6876:"74ea4efe",6894:"368b1371",6923:"ed825a38",6956:"189512e9",6970:"94cfcefd",6992:"262abfc6",7042:"fa34098a",7091:"706d35e4",7184:"e71751db",7190:"4cb724eb",7203:"b70143e8",7235:"536c51e4",7298:"a69f966c",7317:"9d52f313",7336:"0a3efa00",7380:"c628a4e1",7389:"c00fcc70",7475:"bed73633",7477:"501e83b6",7486:"7d5e3c46",7490:"6e969508",7537:"9d50f7c1",7544:"c01f96a5",7553:"a70a143a",7622:"4127ad96",7653:"229a6599",7686:"5f90bc36",7723:"2e52302d",7733:"2ee3f0d7",7756:"d6ba1d3a",7758:"11fe8367",7775:"3108df84",7796:"73828cac",7840:"5a4a962c",7856:"fbf19b12",7899:"0f1ec799",7918:"df16294d",7921:"24268e33",7938:"c6aa864a",7955:"713aeb9f",7968:"0510af69",7976:"16d914ba",7994:"0fd19bed",8e3:"e28502c5",8067:"c84a9491",8171:"5b381734",8212:"0e58bb7c",8233:"3ed98bb2",8271:"05841d70",8357:"77417755",8397:"398fdaa5",8435:"e7cc53a9",8444:"151dc7c0",8446:"d5036718",8525:"e3c3e698",8553:"12ced5fd",8610:"14c8e2db",8662:"e064c1d5",8690:"40563929",8744:"60de814f",8800:"692bbec3",8802:"9f03e67f",8829:"28902d72",8834:"e833580a",8862:"338282ed",8957:"766a64ca",8963:"e55856b3",8999:"7f3d08b2",9002:"4636d9ca",9004:"db348610",9015:"7da7e2be",9037:"d8d7fdcf",9089:"5d2f9bd1",9132:"1b1be693",9169:"948cdc60",9176:"72ce3a4e",9204:"9eb25e48",9297:"daec0cdf",9305:"164a37a7",9329:"a6989350",9331:"12cf33c1",9334:"8199a693",9371:"2f7059c3",9397:"7ad8abe3",9442:"66280c6b",9447:"30272be9",9513:"33309716",9514:"fc89e98a",9523:"7dd368d5",9558:"75718aa3",9626:"be816e72",9633:"300a5532",9673:"f6aa3442",9689:"ab745631",9697:"e3acd7a3",9836:"0837226c",9869:"64068706",9915:"05649366",9929:"23b0a0be",9972:"a88db74a",9977:"8751bf98"}[e]+".js",r.miniCssF=e=>{},r.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||new Function("return this")()}catch(e){if("object"==typeof window)return window}}(),r.o=(e,c)=>Object.prototype.hasOwnProperty.call(e,c),a={},d="starwhale-docs:",r.l=(e,c,b,f)=>{if(a[e])a[e].push(c);else{var t,o;if(void 0!==b)for(var n=document.getElementsByTagName("script"),i=0;i{t.onerror=t.onload=null,clearTimeout(s);var d=a[e];if(delete a[e],t.parentNode&&t.parentNode.removeChild(t),d&&d.forEach((e=>e(b))),c)return c(b)},s=setTimeout(l.bind(null,void 0,{type:"timeout",target:t}),12e4);t.onerror=l.bind(null,t.onerror),t.onload=l.bind(null,t.onload),o&&document.head.appendChild(t)}},r.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},r.p="/zh/",r.gca=function(e){return e={15458654:"2916",17896441:"7918",54156619:"2957",69347124:"9626",72415513:"12",73601713:"3186",92395540:"4612",96340789:"2453",97879929:"7235",b9f79b01:"16","935f2afb":"53","8da59ea2":"100","7c31b07e":"130",d39000ab:"132","4a58b7c0":"133",cd72de2f:"145",c48b3342:"159",cf3566e4:"215",b4d07a96:"234",e1148302:"295","0da56a64":"313","38c59d5b":"321",e04d51ee:"413","9f7d30c4":"418","4ccb9248":"461","379b0c58":"467","50e13b3d":"470","731dc59d":"488",e43103c6:"538",e7ee4ae3:"556","9e437ba1":"596",d26bc519:"643","6ceeb001":"676","62ab62ad":"704",baae20c3:"721","1762fbf2":"760","6bf9f171":"787","7b18e4c0":"815","51d02626":"834","1d7100ea":"863","77e36de8":"873",ca1b85bb:"881","43a14e67":"919","2b086592":"925","1e84151d":"929","20d3256d":"962","543b187e":"993","112df760":"1004","927d70e2":"1045",dffb6346:"1056","92fca8a9":"1080","5017fcbf":"1096",eed9c836:"1112",b9ef3ed8:"1119","6878fabc":"1156",b417d1c4:"1250","1cda5be9":"1263","1608ab83":"1271","3c1bdf30":"1277","270b7806":"1338","70d06172":"1377","7a9241bc":"1383","1beb5f05":"1422",bc8b0dd1:"1431","9e329834":"1493",d1ac0210:"1533","11371c7b":"1536","17c2f848":"1687","954cc45f":"1692","84b32dd0":"1704","86b1c339":"1767","881e0748":"1877",b16f049f:"1895","7181469a":"1908","9f58059d":"1988","8b23682e":"2096",fe87dcf0:"2123","80c5696e":"2134",f4c49af0:"2157","202d83c4":"2166",cd3a0e17:"2235",f313c30f:"2263","66428d6c":"2281",b879cbc2:"2313","910f4e82":"2331",f5714690:"2358","0782712a":"2378",e80f61e7:"2389","0a4c69ff":"2411",c1282265:"2440","403c571c":"2457","58f10d9f":"2493","4f907a97":"2500","2a61987d":"2527",d56f4e5c:"2528","814f3328":"2535",c61247c2:"2558",ba50f2ff:"2648","88e9a799":"2669","74f2bbca":"2692","415ee474":"2782","124d40e3":"2812",f9e0d34e:"2821","143cfaa9":"2849","7d88e7d2":"2870",d351013f:"2876","8053dc0f":"2902","5cde1b65":"2934",e60d5bd6:"2972",a6aa9e1f:"3089",a9712585:"3097","066b2a0b":"3150","014c7df5":"3201","6ed47448":"3247",a45d2ac1:"3256",eb21efd4:"3257","0dbd89b2":"3321",e762c649:"3348",a5e4b368:"3401",e92a8c5e:"3459","85d06a42":"3521",f53bb910:"3526","9e4087bc":"3608",d493a5c5:"3648","3ccbc832":"3722",dff578d5:"3754","07c9474b":"3760","5c0fd647":"3902","5d3ff7ab":"3919",a5a06580:"3973","1446f4f7":"4001","0d5877a0":"4008","01a85c17":"4013","2fc4b8da":"4037",fdd53699:"4046",a9d1e7f6:"4124","327c535f":"4230",a34791fd:"4236",ab54f79f:"4242",c4cf3770:"4317",cc9603cd:"4347","13ed40da":"4381","578e670f":"4434","89d440ea":"4467",c44400de:"4590","1715a123":"4688","107ec5a7":"4696","94dccd29":"4750","55019ac7":"4793",e37f1ba3:"4804","73bbb900":"4805","3c5cb19f":"4820","7a8da0ce":"4896","59c16786":"4959",d8b4b029:"5018",ec22bf99:"5033",bfe407bc:"5034","21bbd13a":"5082","00bb1e00":"5115","07ff6a70":"5162","90b776a0":"5205","487043b0":"5237","8f1c0305":"5304","6e757243":"5309",b40c04b1:"5334",ab807b25:"5363","3a0f4a69":"5394","24e166e6":"5397",b19a7bfe:"5434","032a8bf9":"5466",a186c25a:"5509",d65d4688:"5522",d5b67f64:"5594",f597edc1:"5631","7c009c71":"5635","02693ce6":"5665","737eebb2":"5672","507190c7":"5673","40b162a0":"5715",f626a0b2:"5725","8ba06492":"5737","107adbf4":"5833","2644419a":"5885",bfb16af5:"5909",bc1c86ee:"5917","882f4d3d":"5926","3a601ba2":"5996","8c4bb804":"6002","94a33573":"6007","637eb1a0":"6041",bc4d1ce3:"6057","51f9b954":"6072","2ea24d9b":"6076",ccc49370:"6103",f05bb5a0:"6108","06fdd901":"6165","83e95488":"6174","536f76fa":"6225",eb7fea5c:"6266","9586d72e":"6271","8d1c02c8":"6272","9e0d0da6":"6282",af3c634e:"6314",d08e2a81:"6394","7c060db6":"6406",fd87e000:"6424","18fbe5ee":"6467","9f791bec":"6488",d609b4c3:"6494",b2073e66:"6561","6e888f0d":"6569","8960c193":"6575","321c6c64":"6590","6e93d602":"6608",c41bf2e2:"6617",d1a5f29f:"6693",c1c06e85:"6768",b123388a:"6808","6247c2cc":"6876",dc9acbda:"6894","5c28baca":"6923","4bc5f74d":"6956","96deed6f":"6970","5b17db12":"6992",dacd3dea:"7042","2dad1b44":"7091","318e02a0":"7184","8fa3b36e":"7190","11204aad":"7203","3752a250":"7298","3379d616":"7317","909ad827":"7336","4d8ebb6f":"7380",f3b9a48c:"7389","996e285a":"7475",eaa35b60:"7477",d41dfaf2:"7486",fbf0a0a7:"7490","51092b10":"7537","704d4994":"7544","3d27d96a":"7553",b770e4bf:"7622","5e0c78db":"7653","8edd5372":"7686","8fdf1696":"7723","5cbbb477":"7733","7f203d3a":"7756","5dae2df7":"7758",adaef9b8:"7775","28802b5e":"7796",b493ec7e:"7840","4dbb640a":"7856","45ba351f":"7899",a48dc77d:"7921","2eb13a6c":"7938","1d3d1868":"7955","3c0733f4":"7968",f74c1be1:"7976","1e1ca5c3":"7994","6c71880e":"8000","6e77e7b8":"8067",d459d9f3:"8171","77a8db22":"8212",f5e09728:"8233","1c091541":"8271",a7f6c740:"8397","4124102e":"8435",c9d7c2f8:"8444","4d3733db":"8446","47b83913":"8525","8c932100":"8553","6875c492":"8610","4e5a481b":"8662",a62258e3:"8690",b7c0001e:"8744","3a332aed":"8800",ae41fcbe:"8802","76cec9f8":"8829","15cfd9ba":"8834","31aac386":"8862","8329e158":"8957",cf2d7334:"8963",c0d3eacc:"8999",d9f20a44:"9002","4c2eab68":"9004",bcec44d8:"9015","6c99aaf6":"9037","00f6213b":"9089","7d04adcf":"9132","01c62eef":"9169","37b7e04d":"9176","4f008f40":"9204","066306de":"9297",f8858eb0:"9305","1a11b136":"9329","8889abfd":"9331","247783bb":"9334","41971db3":"9371","1cee4349":"9397","11017be0":"9442",d706acb7:"9447","36907f35":"9513","1be78505":"9514","912b1c2d":"9523",c79576d7:"9558",ee9a0869:"9633","3d5161a6":"9673",b7ae29c0:"9689","53970ccf":"9697","17b2ed56":"9836","6d73ef2c":"9869","439c00c0":"9915","33bc9f06":"9929","8aaa1eea":"9972",b4922c9c:"9977"}[e]||e,r.p+r.u(e)},(()=>{var e={1303:0,532:0};r.f.j=(c,b)=>{var a=r.o(e,c)?e[c]:void 0;if(0!==a)if(a)b.push(a[2]);else if(/^(1303|532)$/.test(c))e[c]=0;else{var d=new Promise(((b,d)=>a=e[c]=[b,d]));b.push(a[2]=d);var f=r.p+r.u(c),t=new Error;r.l(f,(b=>{if(r.o(e,c)&&(0!==(a=e[c])&&(e[c]=void 0),a)){var d=b&&("load"===b.type?"missing":b.type),f=b&&b.target&&b.target.src;t.message="Loading chunk "+c+" failed.\n("+d+": "+f+")",t.name="ChunkLoadError",t.type=d,t.request=f,a[1](t)}}),"chunk-"+c,c)}},r.O.j=c=>0===e[c];var c=(c,b)=>{var a,d,f=b[0],t=b[1],o=b[2],n=0;if(f.some((c=>0!==e[c]))){for(a in t)r.o(t,a)&&(r.m[a]=t[a]);if(o)var i=o(r)}for(c&&c(b);n{"use strict";var e,c,b,a,d,f={},t={};function r(e){var c=t[e];if(void 0!==c)return c.exports;var b=t[e]={exports:{}};return f[e].call(b.exports,b,b.exports,r),b.exports}r.m=f,e=[],r.O=(c,b,a,d)=>{if(!b){var f=1/0;for(i=0;i=d)&&Object.keys(r.O).every((e=>r.O[e](b[o])))?b.splice(o--,1):(t=!1,d0&&e[i-1][2]>d;i--)e[i]=e[i-1];e[i]=[b,a,d]},r.n=e=>{var c=e&&e.__esModule?()=>e.default:()=>e;return r.d(c,{a:c}),c},b=Object.getPrototypeOf?e=>Object.getPrototypeOf(e):e=>e.__proto__,r.t=function(e,a){if(1&a&&(e=this(e)),8&a)return e;if("object"==typeof e&&e){if(4&a&&e.__esModule)return e;if(16&a&&"function"==typeof e.then)return e}var d=Object.create(null);r.r(d);var f={};c=c||[null,b({}),b([]),b(b)];for(var t=2&a&&e;"object"==typeof t&&!~c.indexOf(t);t=b(t))Object.getOwnPropertyNames(t).forEach((c=>f[c]=()=>e[c]));return f.default=()=>e,r.d(d,f),d},r.d=(e,c)=>{for(var b in c)r.o(c,b)&&!r.o(e,b)&&Object.defineProperty(e,b,{enumerable:!0,get:c[b]})},r.f={},r.e=e=>Promise.all(Object.keys(r.f).reduce(((c,b)=>(r.f[b](e,c),c)),[])),r.u=e=>"assets/js/"+({12:"72415513",16:"b9f79b01",53:"935f2afb",100:"8da59ea2",130:"7c31b07e",132:"d39000ab",133:"4a58b7c0",145:"cd72de2f",159:"c48b3342",215:"cf3566e4",234:"b4d07a96",295:"e1148302",313:"0da56a64",321:"38c59d5b",413:"e04d51ee",418:"9f7d30c4",461:"4ccb9248",467:"379b0c58",470:"50e13b3d",488:"731dc59d",538:"e43103c6",556:"e7ee4ae3",596:"9e437ba1",643:"d26bc519",676:"6ceeb001",704:"62ab62ad",721:"baae20c3",760:"1762fbf2",787:"6bf9f171",815:"7b18e4c0",834:"51d02626",863:"1d7100ea",873:"77e36de8",881:"ca1b85bb",919:"43a14e67",925:"2b086592",929:"1e84151d",962:"20d3256d",993:"543b187e",1004:"112df760",1045:"927d70e2",1056:"dffb6346",1080:"92fca8a9",1096:"5017fcbf",1112:"eed9c836",1119:"b9ef3ed8",1156:"6878fabc",1250:"b417d1c4",1263:"1cda5be9",1271:"1608ab83",1277:"3c1bdf30",1338:"270b7806",1377:"70d06172",1383:"7a9241bc",1422:"1beb5f05",1431:"bc8b0dd1",1493:"9e329834",1533:"d1ac0210",1536:"11371c7b",1687:"17c2f848",1692:"954cc45f",1704:"84b32dd0",1767:"86b1c339",1877:"881e0748",1895:"b16f049f",1908:"7181469a",1988:"9f58059d",2096:"8b23682e",2123:"fe87dcf0",2134:"80c5696e",2157:"f4c49af0",2166:"202d83c4",2235:"cd3a0e17",2263:"f313c30f",2281:"66428d6c",2313:"b879cbc2",2331:"910f4e82",2358:"f5714690",2378:"0782712a",2389:"e80f61e7",2411:"0a4c69ff",2440:"c1282265",2453:"96340789",2457:"403c571c",2493:"58f10d9f",2500:"4f907a97",2527:"2a61987d",2528:"d56f4e5c",2535:"814f3328",2558:"c61247c2",2648:"ba50f2ff",2669:"88e9a799",2692:"74f2bbca",2782:"415ee474",2812:"124d40e3",2821:"f9e0d34e",2849:"143cfaa9",2870:"7d88e7d2",2876:"d351013f",2902:"8053dc0f",2916:"15458654",2934:"5cde1b65",2957:"54156619",2972:"e60d5bd6",3089:"a6aa9e1f",3097:"a9712585",3150:"066b2a0b",3186:"73601713",3201:"014c7df5",3247:"6ed47448",3256:"a45d2ac1",3257:"eb21efd4",3321:"0dbd89b2",3348:"e762c649",3401:"a5e4b368",3459:"e92a8c5e",3521:"85d06a42",3526:"f53bb910",3608:"9e4087bc",3648:"d493a5c5",3722:"3ccbc832",3754:"dff578d5",3760:"07c9474b",3902:"5c0fd647",3919:"5d3ff7ab",3973:"a5a06580",4001:"1446f4f7",4008:"0d5877a0",4013:"01a85c17",4037:"2fc4b8da",4046:"fdd53699",4124:"a9d1e7f6",4230:"327c535f",4236:"a34791fd",4242:"ab54f79f",4317:"c4cf3770",4347:"cc9603cd",4381:"13ed40da",4434:"578e670f",4467:"89d440ea",4590:"c44400de",4612:"92395540",4688:"1715a123",4696:"107ec5a7",4750:"94dccd29",4793:"55019ac7",4804:"e37f1ba3",4805:"73bbb900",4820:"3c5cb19f",4896:"7a8da0ce",4959:"59c16786",5018:"d8b4b029",5033:"ec22bf99",5034:"bfe407bc",5082:"21bbd13a",5115:"00bb1e00",5162:"07ff6a70",5205:"90b776a0",5237:"487043b0",5304:"8f1c0305",5309:"6e757243",5334:"b40c04b1",5363:"ab807b25",5394:"3a0f4a69",5397:"24e166e6",5434:"b19a7bfe",5466:"032a8bf9",5509:"a186c25a",5522:"d65d4688",5594:"d5b67f64",5631:"f597edc1",5635:"7c009c71",5665:"02693ce6",5672:"737eebb2",5673:"507190c7",5715:"40b162a0",5725:"f626a0b2",5737:"8ba06492",5833:"107adbf4",5885:"2644419a",5909:"bfb16af5",5917:"bc1c86ee",5926:"882f4d3d",5996:"3a601ba2",6002:"8c4bb804",6007:"94a33573",6041:"637eb1a0",6057:"bc4d1ce3",6072:"51f9b954",6076:"2ea24d9b",6103:"ccc49370",6108:"f05bb5a0",6165:"06fdd901",6174:"83e95488",6225:"536f76fa",6266:"eb7fea5c",6271:"9586d72e",6272:"8d1c02c8",6282:"9e0d0da6",6314:"af3c634e",6394:"d08e2a81",6406:"7c060db6",6424:"fd87e000",6467:"18fbe5ee",6488:"9f791bec",6494:"d609b4c3",6561:"b2073e66",6569:"6e888f0d",6575:"8960c193",6590:"321c6c64",6608:"6e93d602",6617:"c41bf2e2",6693:"d1a5f29f",6768:"c1c06e85",6808:"b123388a",6876:"6247c2cc",6894:"dc9acbda",6923:"5c28baca",6956:"4bc5f74d",6970:"96deed6f",6992:"5b17db12",7042:"dacd3dea",7091:"2dad1b44",7184:"318e02a0",7190:"8fa3b36e",7203:"11204aad",7235:"97879929",7298:"3752a250",7317:"3379d616",7336:"909ad827",7380:"4d8ebb6f",7389:"f3b9a48c",7475:"996e285a",7477:"eaa35b60",7486:"d41dfaf2",7490:"fbf0a0a7",7537:"51092b10",7544:"704d4994",7553:"3d27d96a",7622:"b770e4bf",7653:"5e0c78db",7686:"8edd5372",7723:"8fdf1696",7733:"5cbbb477",7756:"7f203d3a",7758:"5dae2df7",7775:"adaef9b8",7796:"28802b5e",7840:"b493ec7e",7856:"4dbb640a",7899:"45ba351f",7918:"17896441",7921:"a48dc77d",7938:"2eb13a6c",7955:"1d3d1868",7968:"3c0733f4",7976:"f74c1be1",7994:"1e1ca5c3",8e3:"6c71880e",8067:"6e77e7b8",8171:"d459d9f3",8212:"77a8db22",8233:"f5e09728",8271:"1c091541",8397:"a7f6c740",8435:"4124102e",8444:"c9d7c2f8",8446:"4d3733db",8525:"47b83913",8553:"8c932100",8610:"6875c492",8662:"4e5a481b",8690:"a62258e3",8744:"b7c0001e",8800:"3a332aed",8802:"ae41fcbe",8829:"76cec9f8",8834:"15cfd9ba",8862:"31aac386",8957:"8329e158",8963:"cf2d7334",8999:"c0d3eacc",9002:"d9f20a44",9004:"4c2eab68",9015:"bcec44d8",9037:"6c99aaf6",9089:"00f6213b",9132:"7d04adcf",9169:"01c62eef",9176:"37b7e04d",9204:"4f008f40",9297:"066306de",9305:"f8858eb0",9329:"1a11b136",9331:"8889abfd",9334:"247783bb",9371:"41971db3",9397:"1cee4349",9442:"11017be0",9447:"d706acb7",9513:"36907f35",9514:"1be78505",9523:"912b1c2d",9558:"c79576d7",9626:"69347124",9633:"ee9a0869",9673:"3d5161a6",9689:"b7ae29c0",9697:"53970ccf",9836:"17b2ed56",9869:"6d73ef2c",9915:"439c00c0",9929:"33bc9f06",9972:"8aaa1eea",9977:"b4922c9c"}[e]||e)+"."+{12:"d8872a49",16:"b65cf98b",53:"f1489cc9",100:"a702ba12",130:"4b8d3802",132:"e63c79af",133:"9c09623f",145:"f14b6708",159:"ca896a77",215:"c0374aca",234:"0cee5842",295:"e90ae55d",313:"0ad6b86b",321:"0d8da4c9",413:"349dff1a",418:"2fd84db5",461:"2144e990",467:"79c0fdd4",470:"cbd1abcc",488:"6d3b6168",538:"0b2dd499",556:"33abf59c",596:"43c7ccd3",643:"b71d5ae8",676:"d926f7df",704:"ce8e9343",721:"0d2d637e",760:"810bfcc8",787:"59ca4ce1",815:"935c1b04",834:"a45a547c",863:"8c334f49",873:"29f1d5c1",881:"d005c7d5",919:"951ea82a",925:"323d032c",929:"6ad3739e",962:"a7d310f4",993:"c7b8c6c8",1004:"b164d5a2",1045:"44b3d87f",1056:"785cd139",1080:"1edb98dd",1096:"3f2d4542",1112:"681a4c21",1119:"741f7520",1156:"ec4b2395",1250:"42df2345",1263:"d479a02e",1271:"a7934094",1277:"cbd77d01",1338:"08fbae57",1377:"448c5b22",1383:"3a748658",1422:"96013b92",1431:"157cb260",1493:"b8a6cde2",1533:"efbd8c75",1536:"3be3ac5b",1687:"0ef58157",1692:"b7ccbda0",1704:"cd5b9120",1767:"53382bc2",1877:"35ce2638",1895:"a1e15660",1908:"f7eb0c8a",1988:"cc4710e9",2096:"1090cdf9",2123:"91316e7f",2134:"edfdff3d",2157:"74af8b0a",2166:"2cb27060",2235:"4adb1c6a",2263:"f4e51d51",2281:"3e7fac84",2313:"283d1cd1",2331:"7529f4bf",2358:"d8e3d9e2",2378:"4a777b00",2389:"56e5cac8",2411:"12866a64",2440:"9a5b64a7",2453:"fdfeed25",2457:"48594ca2",2493:"92134bdb",2500:"762311d9",2527:"62f16826",2528:"808540dc",2535:"96ccfae5",2558:"301dac69",2648:"082c3309",2669:"4b611d9e",2692:"fc24668b",2782:"b741e5f6",2812:"3a35840e",2821:"dc077a84",2849:"ed864c7d",2870:"da7f5571",2876:"9d510a98",2902:"2a7e386b",2916:"1b2f8580",2934:"ff8a3c0a",2957:"4d239f4e",2972:"e1213d74",3089:"63fb042a",3097:"a4039743",3150:"47f31dba",3186:"a266f556",3201:"79713ad8",3247:"bce95564",3256:"3799aae9",3257:"8f26899c",3321:"f3bdeac7",3348:"dbc87932",3401:"4bedf366",3459:"518e6303",3521:"63347d55",3526:"c8147363",3608:"b5df34c4",3648:"b6d0af57",3722:"09e3fdfd",3754:"e5882e0c",3760:"4273a0a9",3902:"d0625857",3919:"4efbebc3",3973:"e7578bf9",4001:"a2680011",4008:"647636be",4013:"3480387e",4037:"9cbef143",4046:"c1958c8b",4124:"81b022e4",4230:"7faa4767",4236:"947579e3",4242:"8a5ce29e",4317:"d598cb80",4347:"055d4814",4381:"ae186727",4434:"ac20ca5c",4467:"cc901cb4",4590:"9babec49",4612:"14d79fb0",4688:"5b398a5d",4696:"b2f65e4d",4750:"a807be9e",4793:"0a40e499",4804:"7bf107eb",4805:"3142425c",4820:"a079012a",4896:"6d3a0709",4959:"752c2be2",4972:"fcd66616",5018:"3b540579",5033:"201ac3d8",5034:"89bcc7e5",5082:"3b10bd92",5115:"3be8c6b6",5162:"74d600a5",5205:"348977ca",5237:"503bb9df",5304:"ea2b105d",5309:"af717168",5334:"e0ccd0ee",5363:"fe3bc966",5394:"38eefc16",5397:"2382a9f0",5434:"d11ae891",5466:"476b2c92",5509:"bbd01ee2",5522:"44155dae",5594:"2a2abff5",5631:"4f002de9",5635:"8f80d8ab",5665:"e4d8002e",5672:"1ade5d43",5673:"4cabdce6",5715:"30a9cb7f",5725:"823b9584",5737:"dfa5d48a",5833:"ad29a42a",5885:"9af574de",5909:"43ff5e05",5917:"0807d186",5926:"8f8525f3",5996:"78cc09dc",6002:"cfec3c1c",6007:"dc6c4873",6041:"400198c8",6048:"341e3f6b",6057:"a272f844",6072:"c345c2ce",6076:"ec398d05",6103:"6909e0f1",6108:"a4eb8638",6165:"ef602118",6174:"6d142f10",6225:"42f48da9",6266:"c167dad4",6271:"2d4f02fc",6272:"8c65c844",6282:"4a0697a4",6314:"043e7f92",6394:"4066fd6f",6406:"9a63d5aa",6424:"da2b477d",6467:"3232d289",6488:"de71920e",6494:"6294f46c",6561:"73fb34dc",6569:"638f7a1e",6575:"2d66ddfe",6590:"a9d73a94",6608:"299e34eb",6617:"f4381c61",6693:"c720efe4",6768:"47d38d56",6808:"c9f60e30",6876:"74ea4efe",6894:"368b1371",6923:"ed825a38",6956:"189512e9",6970:"94cfcefd",6992:"262abfc6",7042:"fa34098a",7091:"706d35e4",7184:"e71751db",7190:"4cb724eb",7203:"b70143e8",7235:"536c51e4",7298:"a69f966c",7317:"9d52f313",7336:"0a3efa00",7380:"c628a4e1",7389:"c00fcc70",7475:"bed73633",7477:"501e83b6",7486:"7d5e3c46",7490:"6e969508",7537:"9d50f7c1",7544:"c01f96a5",7553:"a70a143a",7622:"4127ad96",7653:"229a6599",7686:"5f90bc36",7723:"2e52302d",7733:"2ee3f0d7",7756:"d6ba1d3a",7758:"11fe8367",7775:"3108df84",7796:"73828cac",7840:"5a4a962c",7856:"fbf19b12",7899:"0f1ec799",7918:"df16294d",7921:"24268e33",7938:"c6aa864a",7955:"713aeb9f",7968:"0510af69",7976:"16d914ba",7994:"0fd19bed",8e3:"e28502c5",8067:"c84a9491",8171:"5b381734",8212:"0e58bb7c",8233:"3ed98bb2",8271:"05841d70",8357:"77417755",8397:"398fdaa5",8435:"e7cc53a9",8444:"151dc7c0",8446:"d5036718",8525:"e3c3e698",8553:"12ced5fd",8610:"14c8e2db",8662:"e064c1d5",8690:"40563929",8744:"60de814f",8800:"692bbec3",8802:"9f03e67f",8829:"28902d72",8834:"e833580a",8862:"338282ed",8957:"766a64ca",8963:"e55856b3",8999:"7f3d08b2",9002:"4636d9ca",9004:"db348610",9015:"7da7e2be",9037:"d8d7fdcf",9089:"5d2f9bd1",9132:"1b1be693",9169:"948cdc60",9176:"72ce3a4e",9204:"9eb25e48",9297:"daec0cdf",9305:"164a37a7",9329:"a6989350",9331:"12cf33c1",9334:"8199a693",9371:"2f7059c3",9397:"7ad8abe3",9442:"66280c6b",9447:"30272be9",9513:"33309716",9514:"fc89e98a",9523:"7dd368d5",9558:"75718aa3",9626:"be816e72",9633:"300a5532",9673:"f6aa3442",9689:"ab745631",9697:"e3acd7a3",9836:"0837226c",9869:"64068706",9915:"05649366",9929:"23b0a0be",9972:"a88db74a",9977:"8751bf98"}[e]+".js",r.miniCssF=e=>{},r.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||new Function("return this")()}catch(e){if("object"==typeof window)return window}}(),r.o=(e,c)=>Object.prototype.hasOwnProperty.call(e,c),a={},d="starwhale-docs:",r.l=(e,c,b,f)=>{if(a[e])a[e].push(c);else{var t,o;if(void 0!==b)for(var n=document.getElementsByTagName("script"),i=0;i{t.onerror=t.onload=null,clearTimeout(s);var d=a[e];if(delete a[e],t.parentNode&&t.parentNode.removeChild(t),d&&d.forEach((e=>e(b))),c)return c(b)},s=setTimeout(l.bind(null,void 0,{type:"timeout",target:t}),12e4);t.onerror=l.bind(null,t.onerror),t.onload=l.bind(null,t.onload),o&&document.head.appendChild(t)}},r.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},r.p="/zh/",r.gca=function(e){return e={15458654:"2916",17896441:"7918",54156619:"2957",69347124:"9626",72415513:"12",73601713:"3186",92395540:"4612",96340789:"2453",97879929:"7235",b9f79b01:"16","935f2afb":"53","8da59ea2":"100","7c31b07e":"130",d39000ab:"132","4a58b7c0":"133",cd72de2f:"145",c48b3342:"159",cf3566e4:"215",b4d07a96:"234",e1148302:"295","0da56a64":"313","38c59d5b":"321",e04d51ee:"413","9f7d30c4":"418","4ccb9248":"461","379b0c58":"467","50e13b3d":"470","731dc59d":"488",e43103c6:"538",e7ee4ae3:"556","9e437ba1":"596",d26bc519:"643","6ceeb001":"676","62ab62ad":"704",baae20c3:"721","1762fbf2":"760","6bf9f171":"787","7b18e4c0":"815","51d02626":"834","1d7100ea":"863","77e36de8":"873",ca1b85bb:"881","43a14e67":"919","2b086592":"925","1e84151d":"929","20d3256d":"962","543b187e":"993","112df760":"1004","927d70e2":"1045",dffb6346:"1056","92fca8a9":"1080","5017fcbf":"1096",eed9c836:"1112",b9ef3ed8:"1119","6878fabc":"1156",b417d1c4:"1250","1cda5be9":"1263","1608ab83":"1271","3c1bdf30":"1277","270b7806":"1338","70d06172":"1377","7a9241bc":"1383","1beb5f05":"1422",bc8b0dd1:"1431","9e329834":"1493",d1ac0210:"1533","11371c7b":"1536","17c2f848":"1687","954cc45f":"1692","84b32dd0":"1704","86b1c339":"1767","881e0748":"1877",b16f049f:"1895","7181469a":"1908","9f58059d":"1988","8b23682e":"2096",fe87dcf0:"2123","80c5696e":"2134",f4c49af0:"2157","202d83c4":"2166",cd3a0e17:"2235",f313c30f:"2263","66428d6c":"2281",b879cbc2:"2313","910f4e82":"2331",f5714690:"2358","0782712a":"2378",e80f61e7:"2389","0a4c69ff":"2411",c1282265:"2440","403c571c":"2457","58f10d9f":"2493","4f907a97":"2500","2a61987d":"2527",d56f4e5c:"2528","814f3328":"2535",c61247c2:"2558",ba50f2ff:"2648","88e9a799":"2669","74f2bbca":"2692","415ee474":"2782","124d40e3":"2812",f9e0d34e:"2821","143cfaa9":"2849","7d88e7d2":"2870",d351013f:"2876","8053dc0f":"2902","5cde1b65":"2934",e60d5bd6:"2972",a6aa9e1f:"3089",a9712585:"3097","066b2a0b":"3150","014c7df5":"3201","6ed47448":"3247",a45d2ac1:"3256",eb21efd4:"3257","0dbd89b2":"3321",e762c649:"3348",a5e4b368:"3401",e92a8c5e:"3459","85d06a42":"3521",f53bb910:"3526","9e4087bc":"3608",d493a5c5:"3648","3ccbc832":"3722",dff578d5:"3754","07c9474b":"3760","5c0fd647":"3902","5d3ff7ab":"3919",a5a06580:"3973","1446f4f7":"4001","0d5877a0":"4008","01a85c17":"4013","2fc4b8da":"4037",fdd53699:"4046",a9d1e7f6:"4124","327c535f":"4230",a34791fd:"4236",ab54f79f:"4242",c4cf3770:"4317",cc9603cd:"4347","13ed40da":"4381","578e670f":"4434","89d440ea":"4467",c44400de:"4590","1715a123":"4688","107ec5a7":"4696","94dccd29":"4750","55019ac7":"4793",e37f1ba3:"4804","73bbb900":"4805","3c5cb19f":"4820","7a8da0ce":"4896","59c16786":"4959",d8b4b029:"5018",ec22bf99:"5033",bfe407bc:"5034","21bbd13a":"5082","00bb1e00":"5115","07ff6a70":"5162","90b776a0":"5205","487043b0":"5237","8f1c0305":"5304","6e757243":"5309",b40c04b1:"5334",ab807b25:"5363","3a0f4a69":"5394","24e166e6":"5397",b19a7bfe:"5434","032a8bf9":"5466",a186c25a:"5509",d65d4688:"5522",d5b67f64:"5594",f597edc1:"5631","7c009c71":"5635","02693ce6":"5665","737eebb2":"5672","507190c7":"5673","40b162a0":"5715",f626a0b2:"5725","8ba06492":"5737","107adbf4":"5833","2644419a":"5885",bfb16af5:"5909",bc1c86ee:"5917","882f4d3d":"5926","3a601ba2":"5996","8c4bb804":"6002","94a33573":"6007","637eb1a0":"6041",bc4d1ce3:"6057","51f9b954":"6072","2ea24d9b":"6076",ccc49370:"6103",f05bb5a0:"6108","06fdd901":"6165","83e95488":"6174","536f76fa":"6225",eb7fea5c:"6266","9586d72e":"6271","8d1c02c8":"6272","9e0d0da6":"6282",af3c634e:"6314",d08e2a81:"6394","7c060db6":"6406",fd87e000:"6424","18fbe5ee":"6467","9f791bec":"6488",d609b4c3:"6494",b2073e66:"6561","6e888f0d":"6569","8960c193":"6575","321c6c64":"6590","6e93d602":"6608",c41bf2e2:"6617",d1a5f29f:"6693",c1c06e85:"6768",b123388a:"6808","6247c2cc":"6876",dc9acbda:"6894","5c28baca":"6923","4bc5f74d":"6956","96deed6f":"6970","5b17db12":"6992",dacd3dea:"7042","2dad1b44":"7091","318e02a0":"7184","8fa3b36e":"7190","11204aad":"7203","3752a250":"7298","3379d616":"7317","909ad827":"7336","4d8ebb6f":"7380",f3b9a48c:"7389","996e285a":"7475",eaa35b60:"7477",d41dfaf2:"7486",fbf0a0a7:"7490","51092b10":"7537","704d4994":"7544","3d27d96a":"7553",b770e4bf:"7622","5e0c78db":"7653","8edd5372":"7686","8fdf1696":"7723","5cbbb477":"7733","7f203d3a":"7756","5dae2df7":"7758",adaef9b8:"7775","28802b5e":"7796",b493ec7e:"7840","4dbb640a":"7856","45ba351f":"7899",a48dc77d:"7921","2eb13a6c":"7938","1d3d1868":"7955","3c0733f4":"7968",f74c1be1:"7976","1e1ca5c3":"7994","6c71880e":"8000","6e77e7b8":"8067",d459d9f3:"8171","77a8db22":"8212",f5e09728:"8233","1c091541":"8271",a7f6c740:"8397","4124102e":"8435",c9d7c2f8:"8444","4d3733db":"8446","47b83913":"8525","8c932100":"8553","6875c492":"8610","4e5a481b":"8662",a62258e3:"8690",b7c0001e:"8744","3a332aed":"8800",ae41fcbe:"8802","76cec9f8":"8829","15cfd9ba":"8834","31aac386":"8862","8329e158":"8957",cf2d7334:"8963",c0d3eacc:"8999",d9f20a44:"9002","4c2eab68":"9004",bcec44d8:"9015","6c99aaf6":"9037","00f6213b":"9089","7d04adcf":"9132","01c62eef":"9169","37b7e04d":"9176","4f008f40":"9204","066306de":"9297",f8858eb0:"9305","1a11b136":"9329","8889abfd":"9331","247783bb":"9334","41971db3":"9371","1cee4349":"9397","11017be0":"9442",d706acb7:"9447","36907f35":"9513","1be78505":"9514","912b1c2d":"9523",c79576d7:"9558",ee9a0869:"9633","3d5161a6":"9673",b7ae29c0:"9689","53970ccf":"9697","17b2ed56":"9836","6d73ef2c":"9869","439c00c0":"9915","33bc9f06":"9929","8aaa1eea":"9972",b4922c9c:"9977"}[e]||e,r.p+r.u(e)},(()=>{var e={1303:0,532:0};r.f.j=(c,b)=>{var a=r.o(e,c)?e[c]:void 0;if(0!==a)if(a)b.push(a[2]);else if(/^(1303|532)$/.test(c))e[c]=0;else{var d=new Promise(((b,d)=>a=e[c]=[b,d]));b.push(a[2]=d);var f=r.p+r.u(c),t=new Error;r.l(f,(b=>{if(r.o(e,c)&&(0!==(a=e[c])&&(e[c]=void 0),a)){var d=b&&("load"===b.type?"missing":b.type),f=b&&b.target&&b.target.src;t.message="Loading chunk "+c+" failed.\n("+d+": "+f+")",t.name="ChunkLoadError",t.type=d,t.request=f,a[1](t)}}),"chunk-"+c,c)}},r.O.j=c=>0===e[c];var c=(c,b)=>{var a,d,f=b[0],t=b[1],o=b[2],n=0;if(f.some((c=>0!==e[c]))){for(a in t)r.o(t,a)&&(r.m[a]=t[a]);if(o)var i=o(r)}for(c&&c(b);n - + - + \ No newline at end of file diff --git a/zh/blog/index.html b/zh/blog/index.html index 2ea5f1141..de5d005d1 100644 --- a/zh/blog/index.html +++ b/zh/blog/index.html @@ -10,7 +10,7 @@ - + @@ -20,7 +20,7 @@ 3) 选择handler:运行对话模型,选择默认项:evaluation:chatbot 4) 选择运行时:选择默认项,内置 5) 高级配置,打开自动释放开关:可设置任务自动释放时长,达到设置时长,系统会自动取消任务运行。如不设置自动释放,您可以在体验完成后手动取消任务。

    点击提交即可运行模型

    image

    四. 查看运行结果和日志

    作业列表页可以查看项目中的所有作业。

    image

    点击作业ID ,进入任务详情页,点击查看日志可查看

    从任务提交到模型运行起来,总计用时5分04秒

    image

    运行成功后返回任务列表,点击终端按钮,可打开 chatbox 页面,在 chatbox 页面和 Llama 2-Chat 对话

    image

    image

    以上就是关于如何使用 Starwhale Cloud 运行 Llama 2-Chat 的说明,如果您在使用过程中有任何问题欢迎私信留言。您也可以通过Starwhale官网了解更多信息,感谢您的关注和支持。

    · 1 分钟阅读
    tianwei

    Starwhale是一个 MLOps平台,能够让您的模型创建、评估和发布流程变得更加轻松。 它旨在为数据科学家和机器学习工程师创建一个方便的工具。

    - + \ No newline at end of file diff --git a/zh/blog/intro-starwhale/index.html b/zh/blog/intro-starwhale/index.html index 7b709d47d..315b693af 100644 --- a/zh/blog/intro-starwhale/index.html +++ b/zh/blog/intro-starwhale/index.html @@ -10,13 +10,13 @@ - +

    Starwhale是什么?

    · 1 分钟阅读
    tianwei

    Starwhale是一个 MLOps平台,能够让您的模型创建、评估和发布流程变得更加轻松。 它旨在为数据科学家和机器学习工程师创建一个方便的工具。

    Starwhale帮您:

    • 跟踪您的训练/测试数据历史记录,包括所有数据项及其相关标签,以便您轻松访问它们。
    • 管理您可以在团队中共享的模型包。
    • 在不同的环境中运行您的模型,无论是在 Nvidia GPU 服务器上还是在嵌入式设备(如 Cherry Pi)上。
    • 为您的模型快速创建配备交互式 Web UI的在线服务。

    Starwhale是一个开放的平台,您可以创建插件来满足自己的需求。

    - + \ No newline at end of file diff --git a/zh/blog/reproduce-and-compare-evals/index.html b/zh/blog/reproduce-and-compare-evals/index.html index f08864c5b..a139f898d 100644 --- a/zh/blog/reproduce-and-compare-evals/index.html +++ b/zh/blog/reproduce-and-compare-evals/index.html @@ -10,13 +10,13 @@ - +

    如何复现评测结果

    · 4 分钟阅读

    Starwhale的开源大语言模型评测报告的评测结果或其他人的评测结果存疑,应该如何复现和对比评测结果?下文将为大家逐一讲解说明

    基本流程:登录账号 → 创建项目 → 运行评测 → 对比结果

    STEP1:登录账号

    需要登录Starwhale平台,点击跳转登录入口。如您尚未注册,可点击 注册入口 进行注册。

    STEP2:创建项目

    成功登录后进入项目列表页,点击右上角的 创建 项目按钮,输入项目名称,点击 提交 按钮即可新建一个项目。

    STEP3:运行评测

    进入评测列表页,点击右上角的 创建 评测按钮,并选择相应参数。

    例如想复现baichuan2-13b使用cmmlu数据集评测的结果,可参考以下内容进行操作:

    1. 选择运行资源,推荐选择资源:A10*24G*2;
    2. 选择模型:starwhale/llm-leaderboard/baichuan2-13b/atgoiscm(v1、latest);
    3. 选择handler:选择:src.evaluation:evaluation_results;
    4. 选择数据集:starwhale/llm-leaderboard/cmmlu/kiwtxza7(v1、latest);
    5. 选择运行时:starwhale/llm-leaderboard/llm-leaderboard/ickinf6q(v1、latest);
    6. 高级配置,关闭自动释放

    点击 提交 即可运行评测。评测运行时,可在评测详情页的任务TAB页点击 查看日志 了解评测运行情况;当评测状态为“成功”时,可在列表页和详情页查看评测结果。

    STEP4:对比结果

    进入报告列表页,点击右上角的 创建 报告按钮。

    报告提供富文本编辑能力,这里主要介绍如何将自己的评测结果和 Starwhale 或者其他的评测结果进行对比。

    1. 输入 报告标题、描述;
    2. 输入 / ,选择 Panel 选项;
    3. 点击 添加评测 按钮,选择评测所属的项目,如“llm-leaderboard”,然后勾选想要添加的评测,点击 添加 可将评测加入评测列表。支持跨项目添加评测,您可以添加多个您想对比的评测;
    4. 将想要进行对比的评测添加完成后:可点击 列管理 设置图标设置评测列表展示的字段及字段展示顺序;鼠标hover评测列表字段,可固定该列、或者按照升序降序进行排序;
    5. 可点击 添加图表 按钮 :选择图表类型,如 Bar Chart;添加 Metrics,如 accuracy相关指标(支持指标模糊搜索);输入 图表标题(非必填),点击 提交 即可将数据以条形图的方式展示,以便更直观得分析;
    6. 点击 发布到项目 按钮发布报告;
    7. 如想分享给其他人,进入 报告列表页 ,打开 分享 开关,获得报告链接的人即可浏览报告。

    reproduce and compare evals

    以上就是关于如何使用 Starwhale Cloud 复现和对比评测结果的说明,如果您在使用过程中有任何问题欢迎私信留言。您也可以通过Starwhale官网了解更多信息,感谢您的关注和支持。

    - + \ No newline at end of file diff --git a/zh/blog/run-llama2-chat-in-five-minutes/index.html b/zh/blog/run-llama2-chat-in-five-minutes/index.html index cc2dbbff8..e3ccadb8e 100644 --- a/zh/blog/run-llama2-chat-in-five-minutes/index.html +++ b/zh/blog/run-llama2-chat-in-five-minutes/index.html @@ -10,7 +10,7 @@ - + @@ -20,7 +20,7 @@ 3) 选择handler:运行对话模型,选择默认项:evaluation:chatbot 4) 选择运行时:选择默认项,内置 5) 高级配置,打开自动释放开关:可设置任务自动释放时长,达到设置时长,系统会自动取消任务运行。如不设置自动释放,您可以在体验完成后手动取消任务。

    点击提交即可运行模型

    image

    四. 查看运行结果和日志

    作业列表页可以查看项目中的所有作业。

    image

    点击作业ID ,进入任务详情页,点击查看日志可查看

    从任务提交到模型运行起来,总计用时5分04秒

    image

    运行成功后返回任务列表,点击终端按钮,可打开 chatbox 页面,在 chatbox 页面和 Llama 2-Chat 对话

    image

    image

    以上就是关于如何使用 Starwhale Cloud 运行 Llama 2-Chat 的说明,如果您在使用过程中有任何问题欢迎私信留言。您也可以通过Starwhale官网了解更多信息,感谢您的关注和支持。

    - + \ No newline at end of file diff --git a/zh/blog/tags/index.html b/zh/blog/tags/index.html index c095df471..8dd5d9f90 100644 --- a/zh/blog/tags/index.html +++ b/zh/blog/tags/index.html @@ -10,13 +10,13 @@ - + - + \ No newline at end of file diff --git a/zh/blog/tags/intro/index.html b/zh/blog/tags/intro/index.html index dc3fabd92..262d0e94f 100644 --- a/zh/blog/tags/intro/index.html +++ b/zh/blog/tags/intro/index.html @@ -10,13 +10,13 @@ - +

    1 篇博文 含有标签「intro」

    查看所有标签

    · 1 分钟阅读
    tianwei

    Starwhale是一个 MLOps平台,能够让您的模型创建、评估和发布流程变得更加轻松。 它旨在为数据科学家和机器学习工程师创建一个方便的工具。

    - + \ No newline at end of file diff --git a/zh/blog/tags/llama-2/index.html b/zh/blog/tags/llama-2/index.html index 0081cb208..abbf4eb28 100644 --- a/zh/blog/tags/llama-2/index.html +++ b/zh/blog/tags/llama-2/index.html @@ -10,7 +10,7 @@ - + @@ -20,7 +20,7 @@ 3) 选择handler:运行对话模型,选择默认项:evaluation:chatbot 4) 选择运行时:选择默认项,内置 5) 高级配置,打开自动释放开关:可设置任务自动释放时长,达到设置时长,系统会自动取消任务运行。如不设置自动释放,您可以在体验完成后手动取消任务。

    点击提交即可运行模型

    image

    四. 查看运行结果和日志

    作业列表页可以查看项目中的所有作业。

    image

    点击作业ID ,进入任务详情页,点击查看日志可查看

    从任务提交到模型运行起来,总计用时5分04秒

    image

    运行成功后返回任务列表,点击终端按钮,可打开 chatbox 页面,在 chatbox 页面和 Llama 2-Chat 对话

    image

    image

    以上就是关于如何使用 Starwhale Cloud 运行 Llama 2-Chat 的说明,如果您在使用过程中有任何问题欢迎私信留言。您也可以通过Starwhale官网了解更多信息,感谢您的关注和支持。

    - + \ No newline at end of file diff --git "a/zh/blog/tags/\346\250\241\345\236\213/index.html" "b/zh/blog/tags/\346\250\241\345\236\213/index.html" index 0c13b3d5d..df231cd64 100644 --- "a/zh/blog/tags/\346\250\241\345\236\213/index.html" +++ "b/zh/blog/tags/\346\250\241\345\236\213/index.html" @@ -10,7 +10,7 @@ - + @@ -20,7 +20,7 @@ 3) 选择handler:运行对话模型,选择默认项:evaluation:chatbot 4) 选择运行时:选择默认项,内置 5) 高级配置,打开自动释放开关:可设置任务自动释放时长,达到设置时长,系统会自动取消任务运行。如不设置自动释放,您可以在体验完成后手动取消任务。

    点击提交即可运行模型

    image

    四. 查看运行结果和日志

    作业列表页可以查看项目中的所有作业。

    image

    点击作业ID ,进入任务详情页,点击查看日志可查看

    从任务提交到模型运行起来,总计用时5分04秒

    image

    运行成功后返回任务列表,点击终端按钮,可打开 chatbox 页面,在 chatbox 页面和 Llama 2-Chat 对话

    image

    image

    以上就是关于如何使用 Starwhale Cloud 运行 Llama 2-Chat 的说明,如果您在使用过程中有任何问题欢迎私信留言。您也可以通过Starwhale官网了解更多信息,感谢您的关注和支持。

    - + \ No newline at end of file diff --git "a/zh/blog/tags/\346\250\241\345\236\213\350\257\204\346\265\213/index.html" "b/zh/blog/tags/\346\250\241\345\236\213\350\257\204\346\265\213/index.html" index b43f61200..d3f33498e 100644 --- "a/zh/blog/tags/\346\250\241\345\236\213\350\257\204\346\265\213/index.html" +++ "b/zh/blog/tags/\346\250\241\345\236\213\350\257\204\346\265\213/index.html" @@ -10,13 +10,13 @@ - +

    1 篇博文 含有标签「模型评测」

    查看所有标签

    · 4 分钟阅读

    Starwhale的开源大语言模型评测报告的评测结果或其他人的评测结果存疑,应该如何复现和对比评测结果?下文将为大家逐一讲解说明

    基本流程:登录账号 → 创建项目 → 运行评测 → 对比结果

    STEP1:登录账号

    需要登录Starwhale平台,点击跳转登录入口。如您尚未注册,可点击 注册入口 进行注册。

    STEP2:创建项目

    成功登录后进入项目列表页,点击右上角的 创建 项目按钮,输入项目名称,点击 提交 按钮即可新建一个项目。

    STEP3:运行评测

    进入评测列表页,点击右上角的 创建 评测按钮,并选择相应参数。

    例如想复现baichuan2-13b使用cmmlu数据集评测的结果,可参考以下内容进行操作:

    1. 选择运行资源,推荐选择资源:A10*24G*2;
    2. 选择模型:starwhale/llm-leaderboard/baichuan2-13b/atgoiscm(v1、latest);
    3. 选择handler:选择:src.evaluation:evaluation_results;
    4. 选择数据集:starwhale/llm-leaderboard/cmmlu/kiwtxza7(v1、latest);
    5. 选择运行时:starwhale/llm-leaderboard/llm-leaderboard/ickinf6q(v1、latest);
    6. 高级配置,关闭自动释放

    点击 提交 即可运行评测。评测运行时,可在评测详情页的任务TAB页点击 查看日志 了解评测运行情况;当评测状态为“成功”时,可在列表页和详情页查看评测结果。

    STEP4:对比结果

    进入报告列表页,点击右上角的 创建 报告按钮。

    报告提供富文本编辑能力,这里主要介绍如何将自己的评测结果和 Starwhale 或者其他的评测结果进行对比。

    1. 输入 报告标题、描述;
    2. 输入 / ,选择 Panel 选项;
    3. 点击 添加评测 按钮,选择评测所属的项目,如“llm-leaderboard”,然后勾选想要添加的评测,点击 添加 可将评测加入评测列表。支持跨项目添加评测,您可以添加多个您想对比的评测;
    4. 将想要进行对比的评测添加完成后:可点击 列管理 设置图标设置评测列表展示的字段及字段展示顺序;鼠标hover评测列表字段,可固定该列、或者按照升序降序进行排序;
    5. 可点击 添加图表 按钮 :选择图表类型,如 Bar Chart;添加 Metrics,如 accuracy相关指标(支持指标模糊搜索);输入 图表标题(非必填),点击 提交 即可将数据以条形图的方式展示,以便更直观得分析;
    6. 点击 发布到项目 按钮发布报告;
    7. 如想分享给其他人,进入 报告列表页 ,打开 分享 开关,获得报告链接的人即可浏览报告。

    reproduce and compare evals

    以上就是关于如何使用 Starwhale Cloud 复现和对比评测结果的说明,如果您在使用过程中有任何问题欢迎私信留言。您也可以通过Starwhale官网了解更多信息,感谢您的关注和支持。

    - + \ No newline at end of file diff --git a/zh/cloud/billing/bills/index.html b/zh/cloud/billing/bills/index.html index 740d0f087..211f61bb0 100644 --- a/zh/cloud/billing/bills/index.html +++ b/zh/cloud/billing/bills/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    账单明细

    账单明细查看

    登录账户中心,点击“账户管理”,可在账户概览页面查看最近账单明细,点击"全部账单”,可跳转查看全部账单明细。

    image

    image

    账单明细字段说明

    • 账单编号:账单的唯一标识
    • 资源:用户所使用的各类资源
    • 资源明细:使用资源运行的作业
    • 消费时间:账单开始时间至账单结束时间
    • 计费项:用户所用的产品或服务所含的具体的计费项目
    • 单价:产品或服务的单价
    • 单价单位:产品或服务单价的单位
    • 用量:产品或服务的使用量
    • 用量单位:产品或服务使用量的单位
    • 状态:账单的支付状态,分为:未结清、已结清、未结算
    - + \ No newline at end of file diff --git a/zh/cloud/billing/index.html b/zh/cloud/billing/index.html index 987f1cbe5..196a7b54a 100644 --- a/zh/cloud/billing/index.html +++ b/zh/cloud/billing/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    产品计费概述

    Starwhale 支持按量付费,按量付费是一种先使用后付费的计费方式。通过按量付费,您可以按资源使用量付费,不需要提前购买大量资源。

    计费说明

    计费项

    Starwhale 根据您选购的资源规格(CPU、GPU、内存)及使用时长进行计费。

    计费方式

    Starwhale 支持按量付费,按量付费是一种先使用后付费的计费方式。通过按量付费,您可以按资源使用量付费,不需要提前购买大量资源。

    按量付费主要按照资源计费周期计费,在每个结算周期生成账单并从账户中扣除相应费用。创建 Job 时,需要确定计算资源配置。

    请确保您在 Job 运行期间可用余额充足,如果在 Job 运行过程中,您的账户余额不足,会导致 Job 无法完成并按照已运行时长收费。

    开通要求

    按照按量付费创建 Job 前,您的 Starwhale 账户可用余额不得小于一个计费周期。

    说明:账户可用余额=充值金额+代金券金额-已消费金额-已退款金额-已冻结金额

    计费周期

    每5分钟为一个计费周期,不足5分钟则按5分钟计算,按照 Job 运行时长结算。

    计费时长

    从 Job 开始运行时计费,到 Job 运行结束后结束计费

    账单明细

    登录账户中心,点击“账户管理”,可在账户概览页面查看最近账单明细,点击"全部账单”,可跳转查看全部账单明细。详细操作流程请参见账单明细

    欠费说明

    如果账号内存在欠费账单,您无法继续使用计算资源。尽快充值结清欠费账单后可继续使用。

    查看欠费金额

    1 登录账户中心

    2 在账户概览可查看欠费金额

    退款说明

    现金余额支持退款

    需要登录账户中心,点击账户管理>充值订单,可退款的充值订单会在操作列显示退款按钮。点击可发起退款申请,详细操作流程请参见申请退款

    - + \ No newline at end of file diff --git a/zh/cloud/billing/recharge/index.html b/zh/cloud/billing/recharge/index.html index f403f0c96..efe9345ba 100644 --- a/zh/cloud/billing/recharge/index.html +++ b/zh/cloud/billing/recharge/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    充值和退款

    充值渠道

    Starwhale目前支持通过微信渠道进行充值。

    充值操作步骤

    操作路径:

    1 登录账户中心,点击“去充值”,可跳转至充值页面。

    image

    2 选择或者输入充值金额,充值金额需要大于50元,同时注意支付渠道的限额(超过限额会无法支付成功)。

    image

    3 选择充值方式,点击“确认充值”,跳转第三方支付渠道完成付款。

    image

    充值订单

    查看充值订单

    操作路径:

    1 登录账户中心,点击“账户管理”,可在账户概览页面查看最近充值订单,点击"全部订单,可跳转查看全部充值订单。

    image

    image

    继续支付充值订单

    如您在充值页面,点击“开始充值”后,因某些原因没有支付,可在30分钟内继续支付。

    操作路径:

    1 登录账户中心,点击“账户管理”,点击账户管理>充值订单菜单栏进入充值订单列表。

    image

    2 找到需要继续支付的订单,点击“继续支付

    image

    3 选择充值方式,点击“确认充值”,跳转第三方支付渠道完成付款。

    image

    取消充值订单

    操作路径:

    1 登录账户中心,点击“账户管理”,点击账户管理>充值订单菜单栏进入充值订单列表。

    image

    2 找到需要取消的订单,点击“取消”,弹出确认弹窗后,点击“”,可取消充值订单。

    image

    - + \ No newline at end of file diff --git a/zh/cloud/billing/refund/index.html b/zh/cloud/billing/refund/index.html index fe93679c4..c6f82c55f 100644 --- a/zh/cloud/billing/refund/index.html +++ b/zh/cloud/billing/refund/index.html @@ -10,7 +10,7 @@ - + @@ -18,7 +18,7 @@
    版本:0.6.4

    账户退款

    申请退款

    1 登录账户中心,点击“账户管理”,点击账户管理>充值订单菜单栏进入充值订单列表。

    image

    2 找到需要退款的订单,点击“退款”,填写退款原因,确认退款金额,可申请退款。

    ::: tips: 退订款项将原路退回,即通过微信支付的订单会退回到支付使用的微信 :::

    image

    image

    image

    image

    - + \ No newline at end of file diff --git a/zh/cloud/billing/voucher/index.html b/zh/cloud/billing/voucher/index.html index 487ab36a4..d1c9c1c4f 100644 --- a/zh/cloud/billing/voucher/index.html +++ b/zh/cloud/billing/voucher/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    代金券

    什么是代金券

    代金券是starwhale以虚拟券形式给予客户的资金类权益,可用于抵扣运行时所使用资源的费用。

    如何查看我的代金券?

    登录Starwhale,进入“账户中心>代金券” 可查看代金券的编号,面值,余额,状态等信息。

    image

    点击右侧操作列“使用明细”打开“代金券使用明细页”,查看该代金券的交易时间、编号、支出等抵扣详细记录。

    image

    如何使用代金券?

    代金券适用于抵扣消费,如果您的Starwhale账户内有代金券,系统会优先抵扣代金券金额,代金券余额为0后会抵扣充值余额。

    - + \ No newline at end of file diff --git a/zh/cloud/index.html b/zh/cloud/index.html index e4c20baa8..8f03c5675 100644 --- a/zh/cloud/index.html +++ b/zh/cloud/index.html @@ -10,13 +10,13 @@ - + - + \ No newline at end of file diff --git a/zh/community/contribute/index.html b/zh/community/contribute/index.html index 7dfd670ca..c49eef976 100644 --- a/zh/community/contribute/index.html +++ b/zh/community/contribute/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    Starwhale 开源贡献指南

    参与贡献

    Starwhale 非常欢迎来自开源社区的贡献,包括但不限于以下方式:

    • 描述使用过程中的遇到的问题
    • 提交Feature Request
    • 参与Slack和Github Issues讨论
    • 参与Code Review
    • 改进文档和示例程序
    • 修复程序Bug
    • 增加Test Case
    • 改进代码的可读性
    • 开发新的Features
    • 编写Enhancement Proposal

    可以通过以下方式参与开发者社区,获取最新信息和联系Starwhale开发者:

    Starwhale社区使用Github Issues来跟踪问题和管理新特性的开发。可以选择"good first issue"或"help wanted"标签的issue,作为参与开发Starwhale的起点。

    Starwhale资源列表

    代码基本结构

    核心目录组织及功能说明如下:

    • client:swcli和Python SDK的实现,使用Python3编写,对应Starwhale Standalone Instance的所有功能。
      • api:Python SDK的接口定义和实现。
      • cli:Command Line Interface的入口点。
      • base:Python 端的一些基础抽象。
      • core:Starwhale 核心概念的实现,包括Dataset、Model、Runtime、Project、Job、Evaluation等。
      • utils:Python 端的一些工具函数。
    • console:前端的实现,使用React + TypeScript编写,对应Starwhale Cloud Instance的Web UI。
    • server:Starwhale Controller的实现,使用Java编写,对应Starwhale Cloud Instance的后端API。
    • docker:Helm Charts,绝大多数Docker Image的Dockerfile等。
    • docs:Starwhale官方文档。
    • example:示例程序,包含MNIST等例子。
    • scripts:一些Bash和Python脚本,用来进行E2E测试和软件发布等。

    Fork&Clone Starwhale仓库

    您需要fork Starwhale仓库代码并clone到本机,

    搭建针对Standalone Instance的本地开发环境

    Standalone Instance采用Python编写,当要修改Python SDK和swcli时,需要进行相应的环境搭建。

    Standalone本地开发环境前置条件

    • OS:Linux或macOS
    • Python:3.7~3.11
    • Docker:>=19.03 (非必须,当调试dockerize、生成docker image或采用docker为载体运行模型任务时需要)
    • Python隔离环境:Python venv 或 virtualenv 或 conda等都可以,用来构建一个隔离的Python环境

    从源码进行安装

    基于上一步clone到本地的仓库目录:starwhale,并进入到client子目录:

    cd starwhale/client

    使用Conda创建一个Starwhale开发环境,或者使用venv/virtualenv等创建:

    conda create -n starwhale-dev python=3.8 -y
    conda activate starwhale-dev

    安装Client包及依赖到starwhale-dev环境中:

    make install-sw
    make install-dev-req

    输入swcli --version命令,观察是否安装成功,开发环境的swcli版本是 0.0.0.dev0

    ❯ swcli --version
    swcli, version 0.0.0.dev0

    ❯ swcli --version
    /home/username/anaconda3/envs/starwhale-dev/bin/swcli

    本地修改代码

    现在可以对Starwhale代码进行修改,不需要重复安装(make install-sw命令)就能在当前starwhale-dev环境是测试cli或sdk。Starwhale Repo中设置了 .editorconfig 文件,大部分IDE或代码编辑器会自动支持该文件的导入,采用统一的缩进设置。

    执行代码检查和测试

    starwhale 目录中操作,会执行单元测试、client的e2e测试、mypy检查、flake8检查和isort检查等。

    make client-all-check

    搭建针对Cloud Instance的本地开发环境

    Cloud Instance的后端采用Java编写,前端采用React+TypeScript编写,可以按需搭建相应的开发环境。

    搭建前端Console开发环境

    搭建后端Server开发环境

    • 开发语言:Java
    • 项目构建工具:Maven
    • 开发框架:Spring Boot+Mybatis
    • 测试框架:Junit5(其中mock框架为mockito,断言部分使用hamcrest,数据库、web服务等模拟使用Testcontainers)
    • 代码检查:使用maven插件 maven-checkstyle-plugin

    Server开发环境前置条件

    • OS:Linux、macOS或Windows
    • JDK: >=11
    • Docker:>=19.03
    • Maven:>=3.8.1
    • Mysql:>=8.0.29
    • Minio
    • Kubernetes cluster/Minikube(如果没有k8s集群,可以使用Minikube作为开发调试时的备选方案)

    修改代码并增加单测

    现在可以进入到相应模块,对server端的代码进行修改、调整。其中业务功能代码位置为src/main/java,单元测试目录为src/test/java。

    执行代码检查和单元测试

    cd starwhale/server
    mvn clean package

    本地部署服务

    • 前置服务

      • Minikube(可选,无k8s集群时可使用此服务,安装方式可见:Minikube

        minikube start
        minikube addons enable ingress
        minikube addons enable ingress-dns
      • Mysql

        docker run --name sw-mysql -d \
        -p 3306:3306 \
        -e MYSQL_ROOT_PASSWORD=starwhale \
        -e MYSQL_USER=starwhale \
        -e MYSQL_PASSWORD=starwhale \
        -e MYSQL_DATABASE=starwhale \
        mysql:latest
      • Minio

        docker run --name minio -d
        -p 9000:9000 --publish 9001:9001
        -e MINIO_DEFAULT_BUCKETS='starwhale'
        -e MINIO_ROOT_USER="minioadmin"
        -e MINIO_ROOT_PASSWORD="minioadmin"
        bitnami/minio:latest
    • 打包server程序

      若部署server端时,需要把前端同时部署上,可先执行前端部分的构建命令,然后执行'mvn clean package',则会自动将已编译好的前端文件打包进来。

      使用如下命令对程序进行打包:

      cd starwhale/server
      mvn clean package
    • 指定server启动所需的环境变量

      # Minio相关配置
      export SW_STORAGE_ENDPOINT=http://${Minio IP,默认为127.0.0.1}:9000
      export SW_STORAGE_BUCKET=${Minio bucket,默认为starwhale}
      export SW_STORAGE_ACCESSKEY=${Minio accessKey,默认为starwhale}
      export SW_STORAGE_SECRETKEY=${Minio secretKey,默认为starwhale}
      export SW_STORAGE_REGION=${Minio region,默认为local}
      # kubernetes配置
      export KUBECONFIG=${.kube配置文件所在路径}\.kube\config

      export SW_INSTANCE_URI=http://${Server服务所在机器IP}:8082
      # Mysql相关配置
      export SW_METADATA_STORAGE_IP=${Mysql IP,默认为127.0.0.1}
      export SW_METADATA_STORAGE_PORT=${Mysql port,默认为3306}
      export SW_METADATA_STORAGE_DB=${Mysql dbname,默认为starwhale}
      export SW_METADATA_STORAGE_USER=${Mysql user,默认为starwhale}
      export SW_METADATA_STORAGE_PASSWORD=${user password,默认为starwhale}
    • 部署server服务

      使用IDE或如下方式部署均可。

      java -jar controller/target/starwhale-controller-0.1.0-SNAPSHOT.jar
    • 功能调试

      这里有两种方式对修改的功能进行调试:

      • 使用swagger-ui进行接口调试,访问 /swagger-ui/index.html找到对应的api即可。
      • 或直接在ui访问,进行相应功能的调试(前提是打包时已经按说明将前端代码进行了提前构建)
    - + \ No newline at end of file diff --git a/zh/concepts/index.html b/zh/concepts/index.html index 55abc89a2..2b27568a0 100644 --- a/zh/concepts/index.html +++ b/zh/concepts/index.html @@ -10,13 +10,13 @@ - + - + \ No newline at end of file diff --git a/zh/concepts/names/index.html b/zh/concepts/names/index.html index 916e30145..8c2c769e1 100644 --- a/zh/concepts/names/index.html +++ b/zh/concepts/names/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    Starwhale中的命名规则

    下文的命名是指对Starwhale中的项目、模型、数据集、运行时以及版本标签进行命名。

    名称限制

    • 名称不区分大小写。
    • 名称必须仅由大小写字母“A-Z a-z”、数字“0-9”、连字符“-”、点“.”和下划线“_”组成。
    • 名称应始终以字母或“_”字符开头。
    • 名称的最大长度为80。

    名称唯一性要求

    • 资源名称在其所影响范围内必须是唯一的。例如,项目名称在实例中必须是唯一的,模型名称在其所在项目中必须是唯一的。
    • 同一个项目下同类资源必须使用不同的名称,包括那些已删除的资源。 例如,项目“Apple”不能有两个名为“Alice”的模型,即使其中一个已经被删除。
    • 不同种类的资源可以有相同的名称。 例如,一个项目、一个模型和一个数据集可以同时被命名为“Alice”。
    • 不同项目的资源可以具有相同的名称。 例如,“Apple”项目中的模型和“Banana”项目中的模型可以具有相同的名称“Alice”。
    • 已经被垃圾回收的资源名称可以重复使用。 例如,将项目“Apple”中名称为“Alice”的模型移除并进行垃圾回收后,该项目可以有一个新的同名模型“Alice”。
    - + \ No newline at end of file diff --git a/zh/concepts/project/index.html b/zh/concepts/project/index.html index d48387e7c..547ae5723 100644 --- a/zh/concepts/project/index.html +++ b/zh/concepts/project/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    Starwhale中的项目

    “项目”是组织不同资源(如模型、数据集等)的基本单位。您可以将项目用于不同的目的。例如,您可以为数据科学家团队、产品线或特定模型创建项目。用户通常在日常工作中会参与一个或多个项目。

    Starwhale Server/Cloud 项目按账号分组。Starwhale Standalone 没有帐号概念。所以您不会在S tarwhale Standalone 项目中看到任何帐号前缀。Starwhale Server/Cloud项目可以是“公共”或“私有”。公共项目意味着同一实例上的所有用户在默认情况下都自动成为该项目的“访客”角色。有关角色的更多信息,请参阅Starwhale中的角色和权限

    Starwhale Standalone会自动创建一个“self”项目并将其配置为默认项目。

    - + \ No newline at end of file diff --git a/zh/concepts/roles-permissions/index.html b/zh/concepts/roles-permissions/index.html index bf1752ca9..7713af7d2 100644 --- a/zh/concepts/roles-permissions/index.html +++ b/zh/concepts/roles-permissions/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    Starwhale中的角色和权限

    角色用于为用户分配权限。只有Starwhale Server/Cloud有角色和权限,Starwhale Standalone没有相应概念。系统会自动创建一个管理员角色并分配给默认用户“starwhale”。一些敏感操作只能由具有管理员角色的用户执行,例如在Starwhale Server中创建新的账号。

    每个项目具有三类角色:

    • 管理员Admin - 项目管理员可以读写项目数据并将项目角色分配给用户。
    • 维护者Maintainer - 项目维护者可以读写项目数据。
    • 访客Guest - 项目访客只能读取项目数据。
    动作管理员Admin维护者Maintainer访客Guest
    管理项目成员
    编辑项目
    查看项目
    创建评价
    删除评价
    查看评价
    创建数据集
    更新数据集
    删除数据集
    查看数据集
    创建模型
    更新模型
    删除模型
    查看型号
    创建运行时
    更新运行时间
    删除运行时
    查看运行时间

    创建项目的用户成为第一个项目管理员。他可以在这之后将角色分配给其他用户。

    - + \ No newline at end of file diff --git a/zh/concepts/versioning/index.html b/zh/concepts/versioning/index.html index 94fa6899e..57260be49 100644 --- a/zh/concepts/versioning/index.html +++ b/zh/concepts/versioning/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    Starwhale中的资源版本控制

    • Starwhale管理所有模型、数据集和运行时的历史记录。对特定资源的每次更新都会附加一个新版本的历史记录。
    • 版本由version id标识。version id是由 Starwhale自动生成的随机字符串,并按其创建时间排序。
    • 版本可以有标签。Starwhale使用版本标签来提供人性化的版本表示。默认情况下,Starwhale会为每个版本附加一个默认标签。默认标记是字母“v”后跟一个数字。对于每个版本化的资源,第一个版本标签始终标记为“v0”,第二个版本标记为“v1”,依此类推。有一个特殊的标签“latest”总是指向最新的版本。删除版本后,将不会重复使用其默认标签。例如,有一个带有标签“v0、v1、v2”的模型。 删除“v2”后,标签将为“v0、v1”。 接下来一个标签将是“v3”而不是“v2”。您可以将自己定义的标签附加到任何版本并随时删除它们。
    • Starwhale使用线性历史,不提供分支。
    • Starwhale资源无法真正回滚。当要恢复某个历史版本时,Starwhale会复制该版本数据并将其作为新版本追加到历史记录的末尾。您可以手动删除和恢复历史版本。
    - + \ No newline at end of file diff --git a/zh/dataset/index.html b/zh/dataset/index.html index 3c0a900d2..29c520e81 100644 --- a/zh/dataset/index.html +++ b/zh/dataset/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    Starwhale 数据集

    overview

    设计概述

    Starwhale Dataset 定位

    Starwhale Dataset 包含数据构建、数据加载和数据可视化三个核心阶段,是一款面向ML/DL领域的数据管理工具。Starwhale Dataset 能直接使用 Starwhale Runtime 构建的环境,能被 Starwhale ModelStarwhale Evaluation 无缝集成,是 Starwhale MLOps 工具链的重要组成部分。

    根据 Machine Learning Operations (MLOps): Overview, Definition, and Architecture 对MLOps Roles的分类,Starwhale Dataset的三个阶段针对用户群体如下:

    • 数据构建:Data Engineer、Data Scientist
    • 数据加载:Data Scientist、ML Developer
    • 数据可视化:Data Engineer、Data Scientist、ML Developer

    mlops-users

    核心功能

    • 高效加载:数据集原始文件存储在 OSS 或 NAS 等外部存储上,使用时按需加载,不需要数据落盘。
    • 简单构建:既支持从 Image/Video/Audio 目录、json文件和 Huggingface 数据集等来源一键构建数据集,又支持编写 Python 代码构建完全自定义的数据集。
    • 版本管理:可以进行版本追踪、数据追加等操作,并通过内部抽象的 ObjectStore,避免数据重复存储。
    • 数据集分发:通过 swcli dataset copy 命令,实现 Standalone 实例和 Cloud/Server 实例的双向数据集分享。
    • 数据可视化:Cloud/Server 实例的 Web 界面中可以对数据集提供多维度、多类型的数据呈现。
    • 制品存储:Standalone 实例能存储本地构建或分发的 swds 系列文件,Cloud/Server 实例使用对象存储提供集中式的 swds 制品存储。
    • Starwhale无缝集成Starwhale Dataset 能使用 Starwhale Runtime 构建的运行环境构建数据集。Starwhale EvaluationStarwhale Model 直接通过 --dataset 参数指定数据集,就能完成自动数据加载,便于进行推理、模型评测等环境。

    关键元素

    • swds 虚拟包文件:swdsswmpswrt 不一样,不是一个打包的单一文件,而是一个虚拟的概念,具体指的是一个目录,是 Starwhale 数据集某个版本包含的数据集相关的文件,包括 _manifest.yaml, dataset.yaml, 数据集构建的Python脚本和数据文件的链接等。可以通过 swcli dataset info 命令查看swds所在目录。swds 是Starwhale Dataset 的简写。

    swds-tree.png

    • swcli dataset 命令行:一组dataset相关的命令,包括构建、分发和管理等功能,具体说明参考CLI Reference
    • dataset.yaml 配置文件:描述数据集的构建过程,可以完全省略,通过 swcli dataset build 参数指定,可以认为 dataset.yaml 是build命令行参数的一种配置文件表示方式。swcli dataset build 参数优先级高于 dataset.yaml
    • Dataset Python SDK:包括数据构建、数据加载和若干预定义的数据类型,具体说明参考Python SDK
    • 数据集构建的 Python 脚本:使用 Starwhale Python SDK 编写的用来构建数据集的一系列脚本。

    最佳实践

    Starwhale Dataset 的构建是独立进行的,如果编写构建脚本时需要引入第三方库,那么使用 Starwhale Runtime 可以简化 Python 的依赖管理,能保证数据集的构建可复现。Starwhale 平台会尽可能多的内建开源数据集,让用户 copy 下来数据集后能立即使用。

    命令行分组

    Starwhale Dataset 命令行从使用阶段的角度上,可以划分如下:

    • 构建阶段
      • swcli dataset build
    • 可视化阶段
      • swcli dataset diff
      • swcli dataset head
    • 分发阶段
      • swcli dataset copy
    • 基本管理
      • swcli dataset tag
      • swcli dataset info
      • swcli dataset history
      • swcli dataset list
      • swcli dataset summary
      • swcli dataset remove
      • swcli dataset recover

    Starwhale Dataset Viewer

    目前 Cloud/Server 实例中 Web UI 可以对数据集进行可视化展示,目前只有使用 Python SDK 的DataType 才能被前端正确的解释,映射关系如下:

    • Image:展示缩略图、放大图、MASK类型图片,支持 image/pngimage/jpegimage/webpimage/svg+xmlimage/gifimage/apngimage/avif 格式。
    • Audio:展示为音频wave图,可播放,支持 audio/mp3audio/wav 格式。
    • Video:展示为视频,可播放,支持 video/mp4video/avivideo/webm 格式。
    • GrayscaleImage:展示灰度图,支持 x/grayscale 格式。
    • Text:展示文本,支持 text/plain 格式,设置设置编码格式,默认为utf-8。
    • Binary和Bytes:暂不支持展示。
    • Link:上述几种多媒体类型都支持指定link作为存储路径。

    Starwhale Dataset 数据格式

    数据集由多个行组成,每个行成为为一个样本,每个样本包含若干 features ,features 是一个类 dict 结构,对key和value有一些简单的限制[L]

    • dict的key必须为str类型。
    • dict的value必须是 int/float/bool/str/bytes/dict/list/tuple 等 Python 的基本类型,或者 Starwhale 内置的数据类型
    • 不同样本的数据相同key的value,不需要保持同一类型。
    • 如果value是list或者tuple,其元素的数据类型必须一致。
    • value为dict时,其限制等同于限制[L]

    例子:

    {
    "img": GrayscaleImage(
    link=Link(
    "123",
    offset=32,
    size=784,
    _swds_bin_offset=0,
    _swds_bin_size=8160,
    )
    ),
    "label": 0,
    }

    文件类数据的处理方式

    Starwhale Dataset 对文件类型的数据进行了特殊处理,如果您不关心 Starwhale 的实现方式,可以忽略本小节。

    根据实际使用场景,Starwhale Dataset 对基类为 starwhale.BaseArtifact 的文件类数据有两种处理方式:

    • swds-bin: Starwhale 以自己的二进制格式 (swds-bin) 将数据合并成若干个大文件,能高效的进行索引、切片和加载。
    • remote-link: 满足用户的原始数据存放在某些外部存储上,比如 OSS 或 NAS 等,原始数据较多,不方便搬迁或者已经用一些内部的数据集实现进行封装过,那么只需要在数据中使用 link,就能建立索引。

    在同一个Starwhale 数据集中,可以同时包含两种类型的数据。

    - + \ No newline at end of file diff --git a/zh/dataset/yaml/index.html b/zh/dataset/yaml/index.html index 7cf7485d6..5d9ebdafb 100644 --- a/zh/dataset/yaml/index.html +++ b/zh/dataset/yaml/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    dataset.yaml 使用指南

    提示

    dataset.yaml 对于 swcli dataset build 构建数据集的过程是非必要的。

    Starwhale Dataset 构建的时候使用 dataset.yaml,若省略 dataset.yaml,则可以在 swcli dataset build 命令行参数中描述相关配置,可以认为 dataset.yamlbuild 命令行的配置文件化表述。

    YAML 字段描述

    字段描述是否必要类型默认值
    nameStarwhale Dataset的名字String
    handler为一个函数,返回一个Generator或一个可迭代的对象或一个实现 __iter__ 方法的类,格式为 {module 路径}:{类名函数名}String
    desc数据集描述信息String""
    versiondataset.yaml格式版本,目前仅支持填写 1.0String1.0
    attr数据集构建参数Dict
    attr.volume_sizeswds-bin格式的数据集每个data文件的大小。当写数字时,单位bytes;也可以是数字+单位格式,如64M, 1GB等Int或Str64MB
    attr.alignment_sizeswds-bin格式的数据集每个数据块的数据alignment大小,如果设置alignment_size为4k,数据块大小为7.9K,则会补齐0.1K的空数据,让数据块为alignment_size的整数倍,提升page size等读取效率Integer或String128

    使用示例

    最简示例

    name: helloworld
    handler: dataset:ExampleProcessExecutor

    helloworld的数据集,使用dataset.yaml目录中dataset.py文件中的 ExampleProcessExecutor 类进行数据构建。

    MNIST数据集构建示例

    name: mnist
    handler: mnist.dataset:DatasetProcessExecutor

    desc: MNIST data and label test dataset

    attr:
    alignment_size: 1k
    volume_size: 4M

    handler为generator function的例子

    dataset.yaml 内容:

    name: helloworld
    handler: dataset:iter_item

    dataset.py 内容:

    def iter_item():
    for i in range(10):
    yield {"img": f"image-{i}".encode(), "label": i}

    本例中,handler为一个generator function,Starwhale SDK根据首个yield出来的元素为非Starwhale.Link类型,等同于继承 starwhale.SWDSBinBuildExecutor 类。

    - + \ No newline at end of file diff --git a/zh/evaluation/heterogeneous/node-able/index.html b/zh/evaluation/heterogeneous/node-able/index.html index 097533702..71ee16422 100644 --- a/zh/evaluation/heterogeneous/node-able/index.html +++ b/zh/evaluation/heterogeneous/node-able/index.html @@ -10,7 +10,7 @@ - + @@ -24,7 +24,7 @@ 参考 链接

    v0.13.0-rc.1 为例

    kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.13.0-rc.1/nvidia-device-plugin.yml

    注意: 此操作会在所有的 K8s 节点中运行 NVIDIA 的 device plugin 插件, 如果之前配置过, 则会被更新, 请谨慎评估使用的镜像版本

  • 确认 GPU 可以在集群中发现和使用 参考下边命令, 查看 Jetson 节点的 Capacity 中有 nvidia.com/gpu, GPU 即被 K8s 集群正常识别

    # kubectl describe node orin | grep -A15 Capacity
    Capacity:
    cpu: 12
    ephemeral-storage: 59549612Ki
    hugepages-1Gi: 0
    hugepages-2Mi: 0
    hugepages-32Mi: 0
    hugepages-64Ki: 0
    memory: 31357608Ki
    nvidia.com/gpu: 1
    pods: 110
  • 制作和使用自定义镜像

    文章前面提到的 l4t-jetpack 镜像可以满足我们一般的使用, 如果我们需要自己定制更加精简或者更多功能的镜像, 可以基于 l4t-base 来制作 相关 Dockerfile 可以参考 Starwhale为mnist制作的镜像

    - + \ No newline at end of file diff --git a/zh/evaluation/heterogeneous/virtual-node/index.html b/zh/evaluation/heterogeneous/virtual-node/index.html index 9f63e4352..3b937a007 100644 --- a/zh/evaluation/heterogeneous/virtual-node/index.html +++ b/zh/evaluation/heterogeneous/virtual-node/index.html @@ -10,7 +10,7 @@ - + @@ -19,7 +19,7 @@ 此方案被各云厂商广泛用于 serverless 容器集群方案, 比如阿里云的 ASK, Amazon 的 AWS Fargate 等.

    原理

    virtual kubelet 框架将 kubelet 对于 Node 的相关接口进行实现, 只需要简单的配置即可模拟一个节点. 我们只需要实现 PodLifecycleHandler 接口即可支持:

    • 创建, 更新, 删除 Pod
    • 获取 Pod 状态
    • 获取 Container 日志

    将设备加入集群

    如果我们的设备由于资源限制等情况无法作为 K8s 的一个节点进行服务, 那么我们可以通过使用 virtual kubelet 模拟一个代理节点的方式对这些设备进行管理, Starwhale Controller 和设备的控制流如下


    ┌──────────────────────┐ ┌────────────────┐ ┌─────────────────┐ ┌────────────┐
    │ Starwhale Controller ├─────►│ K8s API Server ├────►│ virtual kubelet ├────►│ Our device │
    └──────────────────────┘ └────────────────┘ └─────────────────┘ └────────────┘

    virtual kubelet 将 Starwhale Controller 下发下来的 Pod 编排信息转化为对设备的控制行为, 比如 ssh 到设备上执行一段命令, 或者通过 USB 或者串口发送一段消息等.

    下面是使用 virtual kubelet 的方式来对一个未加入集群的可以 ssh 的设备进行控制的示例

    1. 准备证书
    • 创建文件 vklet.csr, 内容如下
    [req]
    req_extensions = v3_req
    distinguished_name = req_distinguished_name
    [req_distinguished_name]
    [v3_req]
    basicConstraints = CA:FALSE
    keyUsage = digitalSignature, keyEncipherment
    extendedKeyUsage = serverAuth
    subjectAltName = @alt_names
    [alt_names]
    IP = 1.2.3.4
    • 生成证书
    openssl genrsa -out vklet-key.pem 2048
    openssl req -new -key vklet-key.pem -out vklet.csr -subj '/CN=system:node:1.2.3.4;/C=US/O=system:nodes' -config ./csr.conf
    • 提交证书
    cat vklet.csr| base64 | tr -d "\n" # 输出内容作为 csr.yaml 文件中 spec.request 的内容

    csr.yaml

    apiVersion: certificates.k8s.io/v1
    kind: CertificateSigningRequest
    metadata:
    name: vklet
    spec:
    request: ******************************************************
    signerName: kubernetes.io/kube-apiserver-client
    expirationSeconds: 1086400
    usages:
    - client auth
     kubectl apply -f csr.yaml
    kubectl certificate approve vklet
    kubectl get csr vklet -o jsonpath='{.status.certificate}'| base64 -d > vklet-cert.pem

    现在我们得到了 vklet-cert.pem

    • 编译 virtual kubelet
    git clone https://github.com/virtual-kubelet/virtual-kubelet
    cd virtual-kubelet && make build

    创建节点的配置文件 mock.json

    {
    "virtual-kubelet":
    {
    "cpu": "100",
    "memory": "100Gi",
    "pods": "100"
    }
    }

    启动 virtual kubelet

    export APISERVER_CERT_LOCATION=/path/to/vklet-cert.pem
    export APISERVER_KEY_LOCATION=/path/to/vklet-key.pem
    export KUBECONFIG=/path/to/kubeconfig

    virtual-kubelet --provider mock --provider-config /path/to/mock.json

    至此, 我们使用 virtual kubelet 模拟了一个 100 core + 100G 内存的节点.

    • 增加 PodLifecycleHandler 的实现, 将 Pod 编排中的重要信息转化为 ssh 命令执行, 并且收集日志待 Starwhale Controller 收集

    具体实现可参考 ssh executor

    - + \ No newline at end of file diff --git a/zh/evaluation/index.html b/zh/evaluation/index.html index 298f223e6..4a380ad6f 100644 --- a/zh/evaluation/index.html +++ b/zh/evaluation/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    Starwhale 模型评测

    设计概述

    Starwhale Evaluation 定位

    Starwhale Evaluation 目标是对模型评测进行全流程管理,包括创建 Job、分发 Task、查看模型评测报告和基本管理等。Starwhale Evaluation 是 Starwhale构建的 MLOps 工具链使用 Starwhale ModelStarwhale DatasetStarwhale Runtime 三个基础元素,在模型评测这个场景上的具体应用,后续还会包含 Starwhale Model ServingStarwhale Training 等应用场景。

    核心功能

    • 可视化展示swcli和 Web UI都提供对模型评测结果的可视化展示,支持多个结果的对比等功能,同时用户可以自定义记录评测中间过程。
    • 多场景适配:不管是在笔记本的单机环境,还是在分布式服务器集群环境,都能使用统一的命令、Python脚本、制品和操作方法进行模型评测,满足不同算力、不同数据量的外部环境要求。
    • Starwhale无缝集成:使用Starwhale Runtime提供的运行环境,将 Starwhale Dataset 作为数据输入,在 Starwhale Model 中运行模型评测任务,不管是在 swcli、Python SDK 还是 Cloud/Server 实例 Web UI中,都能简单的进行配置。

    关键元素

    • swcli model run 命令行: 能够完成模型的批量、离线式评测。
    • swcli model serve 命令行: 能够完成模型的在线评测。

    最佳实践

    命令行分组

    从完成 Starwhale Evaluation 全流程任务的角度,可以将所涉及的命令分组如下:

    • 基础准备阶段
      • swcli dataset build 或 Starwhale Dataset Python SDK
      • swcli model build 或 Starwhale Model Python SDK
      • swcli runtime build
    • 评测阶段
      • swcli model run
      • swcli model serve
    • 结果展示阶段
      • swcli job info
    • 基本管理
      • swcli job list
      • swcli job remove
      • swcli job recover

    job-step-task 抽象

    • job: 一次模型评测任务就是一个 job,一个 job 包含一个或多个 step
    • step: step 对应评测过程中的某个阶段。使用PipelineHandler的默认评测过程,step就是predictevaluate;用户自定义的评测过程,step 就是使用 @handler, @evaluation.predict, @evaluation.evaluate 修饰的函数。step 之间可以有依赖关系,形成一个DAG。一个 step 包含一个或多个 task。同一 step 中的不同 task,执行逻辑是一致的,只是输入参数不同,常见做法是将数据集分割成若干部分,然后传入每个task 中,task 可以并行执行。
    • task: task 是最终运行的实体。在 Cloud/Server 实例中,一个 task 就是一个Pod的container; 在Standalone 实例中,一个 task 就是一个 Python Thread。

    job-step-task 的抽象是实现 Starwhale Evaluation 分布式运行的基础。

    - + \ No newline at end of file diff --git a/zh/faq/index.html b/zh/faq/index.html index bf842e828..d756ebe75 100644 --- a/zh/faq/index.html +++ b/zh/faq/index.html @@ -10,13 +10,13 @@ - +
    - + \ No newline at end of file diff --git a/zh/getting-started/cloud/index.html b/zh/getting-started/cloud/index.html index 3da348342..541867670 100644 --- a/zh/getting-started/cloud/index.html +++ b/zh/getting-started/cloud/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    Starwhale Cloud入门指南

    Starwhale Cloud运行在阿里云上,域名是 https://cloud.starwhale.cn ,后续我们会推出部署在AWS上的 https://cloud.starwhale.ai 服务,需要注意的是,这是两个相互独立的实例,帐户和数据不共享。您可以选择任何一个开始。

    在开始之前,您需要先安装Starwhale Client(swcli)

    注册Starwhale Cloud并创建您的第一个项目

    您可以直接使用自己的GitHub或微信帐号登录,也可以注册一个新的帐号。如果您使用 GitHub 或 微信帐号登录,系统会要求您提供用户名。

    然后您可以创建一个新项目。在本教程中,我们将使用名称 demo 作为项目名称。

    在本地机器上构建数据集、模型和运行时

    按照Starwhale Standalone入门指南中的步骤1到步骤4在本地机器上创建:

    • 一个名为mnist的Starwhale模型
    • 一个名为mnist的Starwhale数据集
    • 一个名为pytorch的Starwhale运行时

    登录云实例

    swcli instance login --username <您的用户名> --password <您的密码> --alias swcloud https://cloud.starwhale.cn

    将数据集、模型和运行时复制到Starwhale Cloud

    swcli model copy mnist swcloud/project/demo
    swcli dataset copy mnist swcloud/project/demo
    swcli runtime copy pytorch swcloud/project/demo

    使用 Web UI 运行评估

    console-create-job.gif

    恭喜! 您已完成Starwhale Cloud的入门指南。

    - + \ No newline at end of file diff --git a/zh/getting-started/index.html b/zh/getting-started/index.html index 56a826bd1..25b26c4ab 100644 --- a/zh/getting-started/index.html +++ b/zh/getting-started/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    入门指南

    首先,您需要安装Starwhale Client(swcli),可以运行如下命令:

    python3 -m pip install starwhale

    更多详细信息请参阅swcli安装指南

    根据您使用的实例类型,您可以参考以下三个入门指南:

    • Starwhale Standalone入门指南 - 本指南可帮助您在台式PC/笔记本电脑上运行一个MNIST评估。这是开始使用Starwhale最快最简单的方法。
    • Starwhale Server入门指南 - 本指南可帮助您在私有服务器上安装Starwhale Server并运行一个MNIST评估。在本指南结束时,您将拥有一个Starwhale Server实例,您可以在其中管理您的数据集和模型。
    • Starwhale Cloud入门指南 - 本指南可帮助您在Starwhale Cloud上创建帐户并运行MNIST评估。这是体验所有Starwhale功能的最简单方法。
    - + \ No newline at end of file diff --git a/zh/getting-started/runtime/index.html b/zh/getting-started/runtime/index.html index ade04dfbe..3803a5797 100644 --- a/zh/getting-started/runtime/index.html +++ b/zh/getting-started/runtime/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    Starwhale Runtime入门指南

    本文演示如何搭建Pytorch环境的Starwhale Runtime以及如何在不同环境中使用它。该runtime可以满足Starwhale中六个例子的依赖需求:mnist、speech commands、nmt、cifar10、ag_news、PennFudan。相关代码链接:example/runtime/pytorch

    您可以从本教程中学到以下内容:

    • 如何构建Starwhale Runtime。
    • 如何在不同场景下使用Starwhale Runtime。
    • 如何发布Starwhale Runtime。

    前置条件

    基础环境

    运行以下命令以克隆示例代码:

    git clone https://github.com/star-whale/starwhale.git
    cd starwhale/example/runtime/pytorch-cn-mirror #非中国大陆网络可使用pytorch例子

    构建Starwhale Runtime

    ❯ swcli -vvv runtime build --yaml runtime.yaml

    在Standalone Instance中使用Starwhale Runtime

    在shell中使用Starwhale Runtime

    # 激活runtime
    swcli runtime activate pytorch-cn-mirror

    swcli runtime activate会下载runtime的所有python依赖,并在当前shell环境中激活该环境。这个过程可能需要很长时间。

    当runtime被激活时,所有依赖项都已在您的python环境中准备就绪,类似于virtualenv的source venv/bin/activate或者conda的conda activate命令。如果您关闭了shell或切换到另一个shell,则下次使用之前需要重新激活这个runtime。

    在swcli中使用Starwhale Runtime

    # 模型构建中使用runtime
    swcli model build . --runtime pytorch-cn-mirror
    # 数据集构建中使用runtime
    swcli dataset build . --runtime pytorch-cn-mirror
    # 模型评测中使用runtime
    swcli model run --uri mnist/version/v0 --dataset mnist --runtime pytorch-cn-mirror

    将 Starwhale Runtime 复制到另一个实例

    您可以将运行时复制到Server/Cloud实例,然后可以在Server/Cloud实例中使用或由其他用户下载。

    # 将runtime复制到名为“pre-k8s”的Server实例
    ❯ swcli runtime copy pytorch-cn-mirror cloud://pre-k8s/project/starwhale
    - + \ No newline at end of file diff --git a/zh/getting-started/server/index.html b/zh/getting-started/server/index.html index 2e5390f4f..1cde452a1 100644 --- a/zh/getting-started/server/index.html +++ b/zh/getting-started/server/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    Starwhale Server入门指南

    安装Starwhale Server

    安装 Starwhale Server,参见安装指南

    创建您的第一个项目

    登录服务器

    打开浏览器并在地址栏中输入服务器的 URL。 使用默认用户名(starwhale)和密码(abcd1234)登录。

    console-artifacts.gif

    创建一个新项目

    在本地机器上构建数据集、模型和运行时

    按照Starwhale Standalone入门指南中的步骤1到步骤4在本地机器上创建:

    • 一个名为mnist的Starwhale模型
    • 一个名为mnist的Starwhale数据集
    • 一个名为pytorch的Starwhale运行时

    将数据集、模型和运行时复制到Starwhale Server

    swcli instance login --username <your username> --password <your password> --alias server <Your Server URL>

    swcli model copy mnist server/project/demo
    swcli dataset copy mnistserver/project/demo
    swcli runtime copy pytorch server/project/demo

    使用Web UI运行模型评估

    使用浏览器打开“demo”项目并创建一个新的评估。

    console-create-job.gif

    恭喜! 您已完成Starwhale Server的入门指南。

    - + \ No newline at end of file diff --git a/zh/getting-started/standalone/index.html b/zh/getting-started/standalone/index.html index 818397e22..bfbf2608c 100644 --- a/zh/getting-started/standalone/index.html +++ b/zh/getting-started/standalone/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    Starwhale Standalone入门指南

    Starwhale Client(swcli)安装完成后,您就可以使用Starwhale Standalone。

    我们也提供对应的Jupyter Notebook例子,可以在 Google Colab 或本地的 vscode/jupyterlab 中试用。

    下载例子

    通过以下方式克隆Starwhale项目来下载Starwhale示例:

    GIT_LFS_SKIP_SMUDGE=1 git clone https://github.com/star-whale/starwhale.git --depth 1
    cd starwhale

    为了节省例子的下载时间,我们执行git clone命令时,忽略了git-lfs,并只保留最近一次的commit信息。我们选用ML/DL领域的HelloWorld程序-MNIST来介绍如何从零开始构建数据集、模型包和运行环境,并最终完成模型评测。接下来的操作都在 starwhale 目录中进行。

    核心工作流程

    构建 Pytorch 运行时

    运行时示例代码位于example/runtime/pytorch目录中。

    • 构建Starwhale运行时包:

      swcli runtime build --yaml example/runtime/pytorch/runtime.yaml
      提示

      当首次构建Starwhale Runtime时,由于需要创建venv或conda隔离环境,并下载相关的Python依赖,命令执行需要花费一段时间。时间长短取决与所在机器的网络情况和runtime.yaml中Python依赖的数量。建议合理设置机器的 ~/.pip/pip.conf 文件,填写缓存路径和适合当前网络环境的pypi mirror地址。

      处于中国大陆网络环境中的用户,可以参考如下配置:

      [global]
      cache-dir = ~/.cache/pip
      index-url = https://pypi.tuna.tsinghua.edu.cn/simple
      extra-index-url = https://mirrors.aliyun.com/pypi/simple/
    • 检查您本地的Starwhale运行时:

      swcli runtime list
      swcli runtime info pytorch

    构建模型

    模型示例代码位于 example/mnist 目录中。

    • 下载预训练模型文件:

      cd example/mnist
      CN=1 make download-model
      # 非中国大陆网络用户,可以省略 CN=1 环境变量
      cd -
    • 构建一个Starwhale模型:

      swcli model build example/mnist --runtime pytorch
    • 检查您本地的Starwhale模型:

      swcli model list
      swcli model info mnist

    构建数据集

    数据集示例代码位于 example/mnist 目录中。

    • 下载MNIST原始数据:

      cd example/mnist
      CN=1 make download-data
      # 非中国大陆网络用户,可以省略 CN=1 环境变量
      cd -
    • 构建Starwhale数据集:

      swcli dataset build --yaml example/mnist/dataset.yaml
    • 检查您本地的Starwhale数据集:

      swcli dataset list
      swcli dataset info mnist
      swcli dataset head mnist

    运行评估作业

    • 创建评估工作

      swcli -vvv model run --uri mnist --dataset mnist --runtime pytorch
    • 检查评估结果

      swcli job list
      swcli job info $(swcli job list | grep mnist | grep success | awk '{print $1}' | head -n 1)

    恭喜! 您已完成Starwhale Standalone的入门指南。

    - + \ No newline at end of file diff --git a/zh/index.html b/zh/index.html index 0a522e60b..5604d4f4f 100644 --- a/zh/index.html +++ b/zh/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    什么是Starwhale

    概述

    Starwhale是一个 MLOps/LLMOps平台,能够让您的模型创建、评估和发布流程变得更加轻松。它旨在为数据科学家和机器学习工程师创建一个方便的工具。

    Starwhale能够帮助您:

    • 跟踪您的训练/测试数据集历史记录,包括所有数据项及其相关标签,以便您轻松访问它们。
    • 管理您可以在团队中共享的模型包。
    • 在不同的环境中运行您的模型,无论是在 Nvidia GPU服务器上还是在嵌入式设备(如 Cherry Pi)上。
    • 为您的模型快速创建配备交互式 Web UI的在线服务。

    同时,Starwhale 是一个开放的平台,您可以创建插件来满足自己的需求。

    部署选项

    Starwhale的每个部署称为一个实例。所有实例都可以通过Starwhale Client(swcli)进行管理。

    您可以任选以下实例类型之一开始使用:

    • Starwhale Standalone - Starwhale Standalone 本质上是一套存储在本地文件系统中的数据库。它由 Starwhale Client(swcli)创建和管理。您只需安装 swcli 即可使用。目前,一台机器上的每个用户只能拥有一个Starwhale Standalone 实例。我们建议您使用 Starwhale Standalone 来构建和测试您的数据集和模型,然后再将它们推送到 Starwhale Server/Cloud 实例。
    • Starwhale Server - Starwhale Server 是部署在您本地服务器上的服务。除了 Starwhale Client(swcli)的文本交互界面,Starwhale Server还提供 Web UI供您管理数据集和模型,以及在Kubernetes集群中运行模型并查看运行结果。
    • Starwhale Cloud - Starwhale Cloud 是托管在公共云上的服务。 通过在https://cloud.starwhale.cn注册一个账号,您就可以使用Starwhale,而无需安装、运行和维护您自己的实例。 Starwhale Cloud 还提供公共资源供您下载,例如一些流行的开源集数据集、模型和运行时。查看 Starwhale Cloud 实例上的 “starwhale/public”项目以获取更多详细信息。

    在您决定要使用的实例类型时,请考虑以下因素:

    实例类型部署位置维护者用户界面可扩展性
    Starwhale Standalone您的笔记本电脑或本地服务器不需要命令行不可扩展
    Starwhale Server您的数据中心您自己Web UI和命令行可扩展,取决于您的 Kubernetes 集群
    Starwhale Cloud公共云,如AWS或阿里云Starwhale团队Web UI和命令行可扩展,但目前受到云上免费可用资源的限制
    - + \ No newline at end of file diff --git a/zh/model/index.html b/zh/model/index.html index db6f935fc..9b0808dac 100644 --- a/zh/model/index.html +++ b/zh/model/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    Starwhale 模型

    overview

    Starwhale 模型是一种机器学习模型的标准包格式,可用于多种用途,例如模型微调、模型评估和在线服务。 Starwhale 模型包含模型文件、推理代码、配置文件等等。

    创建一个 Starwhale 模型

    创建 Starwhale 模型有两种方法:通过 swcli 或通过 SDK

    使用 swcli 创建 Starwhale 模型

    使用 swcli 创建 Starwhale 模型之前,您可以定义一个model.yaml,其中描述了关于Starwhale模型的一些必要信息,然后运行以下命令:

    swcli model build . --model-yaml /path/to/model.yaml

    有关该命令和 model.yaml 的更多信息,请参阅swcli参考。需要注意的是,model.yaml 是非必要的。

    使用 Python SDK 创建 Starwhale 模型

    from starwhale import model, predict

    @predict
    def predict_img(data):
    ...

    model.build(name="mnist", modules=[predict_img])

    管理 Starwhale 模型

    使用 swcli 管理 Starwhale 模型

    命令说明
    swcli model list列出项目中所有Starwhale模型
    swcli model info显示有关Starwhale模型的详细信息
    swcli model copy将Starwhale模型复制到另一个位置
    swcli model remove删除Starwhale模型
    swcli model recover恢复之前删除的Starwhale模型

    使用 Web 界面管理 Starwhale 模型

    管理 Starwhale 模型的历史版本

    Starwhale 模型是版本化的。关于版本的基本信息可以参考 Starwhale中的资源版本控制

    使用 swcli 管理 Starwhale 模型的历史版本

    命令说明
    swcli model history列出Starwhale模型的所有版本
    swcli model info显示某个Starwhale模型版本的详细信息
    swcli model diff比较两个版本的Starwhale模型
    swcli model copy复制某个Starwhale模型版本到新的版本
    swcli model remove删除某个Starwhale模型版本
    swcli model recover恢复以前删除的Starwhale模型版本

    模型评估

    使用swcli进行模型评估

    命令说明
    swcli model run指定某个Starwhale模型进行模型评估

    存储格式

    Starwhale模型是一个打包了原始目录的tar文件。

    - + \ No newline at end of file diff --git a/zh/model/yaml/index.html b/zh/model/yaml/index.html index b785aafe4..876b79a27 100644 --- a/zh/model/yaml/index.html +++ b/zh/model/yaml/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    model.yaml 使用指南

    提示

    model.yaml 对于 swcli model build 构建模型的过程是非必要的。

    Starwhale Model 构建时,若使用 swcli model build 命令,可以通过 --model-yaml 参数指定符合特定格式的yaml文件,简化模型构建的参数指定。

    即使不指定 --model-yaml 参数,swcli model build 也会自动寻找 ${workdir} 目录下的 model.yaml 文件,会提取其中的参数。swcli model build 命令行中指定参数优先级大于 model.yaml 中的等价配置,可以认为 model.yamlbuild 命令行的配置文件化表述。

    当使用 Python SDK 方式构建 Starwhale 模型时,model.yaml 文件不生效。

    YAML 字段描述

    字段描述是否必要类型默认值
    nameStarwhale Model 的名字,等价于 --name 参数。String
    run.modules模型构建时搜索的Python Moduels,可以指定多个模型运行的入口点,格式为 Python 可 Imported 路径。等价于 --module 参数。List[String]
    run.handlerrun.modules的曾用写法,只能指定一个模型运行的入口点,已废弃String
    versiondataset.yaml格式版本,目前仅支持填写 1.0String1.0
    desc数据集描述信息,等价于 --desc 参数。String

    使用示例

    name: helloworld
    run:
    modules:
    - src.evaluator
    desc: "example yaml"

    名称为 helloworld 的 Starwhale 模型,搜索 swcli model build {WORKDIR} 命令中 ${WORKDIR} 目录相对的 src/evaluator.py 文件中被 @evaluation.predict, @evaluation.evaluate@handler 修饰的函数, 或继承自 PipelineHandler 的类,这些函数或类会被加入 Starwhale 模型可运行的入口点列表中,在 swcli model run 或 Web UI 运行时,选择对应的入口点(handler)运行模型。

    model.yaml 是非必要的,yaml 中定义参数可以在 swcli 命令行参数中指定。

    swcli model build . --model-yaml model.yaml

    等价于:

    swcli model build . --name helloworld --module src.evaluator --desc "example yaml"
    - + \ No newline at end of file diff --git a/zh/next/cloud/billing/bills/index.html b/zh/next/cloud/billing/bills/index.html index ae8b45f13..67390b588 100644 --- a/zh/next/cloud/billing/bills/index.html +++ b/zh/next/cloud/billing/bills/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    账单明细

    账单明细查看

    登录账户中心,点击“账户管理”,可在账户概览页面查看最近账单明细,点击"全部账单”,可跳转查看全部账单明细。

    image

    image

    账单明细字段说明

    • 账单编号:账单的唯一标识
    • 资源:用户所使用的各类资源
    • 资源明细:使用资源运行的作业
    • 消费时间:账单开始时间至账单结束时间
    • 计费项:用户所用的产品或服务所含的具体的计费项目
    • 单价:产品或服务的单价
    • 单价单位:产品或服务单价的单位
    • 用量:产品或服务的使用量
    • 用量单位:产品或服务使用量的单位
    • 状态:账单的支付状态,分为:未结清、已结清、未结算
    - + \ No newline at end of file diff --git a/zh/next/cloud/billing/index.html b/zh/next/cloud/billing/index.html index 2b16465ba..3a0c9948b 100644 --- a/zh/next/cloud/billing/index.html +++ b/zh/next/cloud/billing/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    产品计费概述

    Starwhale 支持按量付费,按量付费是一种先使用后付费的计费方式。通过按量付费,您可以按资源使用量付费,不需要提前购买大量资源。

    计费说明

    计费项

    Starwhale 根据您选购的资源规格(CPU、GPU、内存)及使用时长进行计费。

    计费方式

    Starwhale 支持按量付费,按量付费是一种先使用后付费的计费方式。通过按量付费,您可以按资源使用量付费,不需要提前购买大量资源。

    按量付费主要按照资源计费周期计费,在每个结算周期生成账单并从账户中扣除相应费用。创建 Job 时,需要确定计算资源配置。

    请确保您在 Job 运行期间可用余额充足,如果在 Job 运行过程中,您的账户余额不足,会导致 Job 无法完成并按照已运行时长收费。

    开通要求

    按照按量付费创建 Job 前,您的 Starwhale 账户可用余额不得小于一个计费周期。

    说明:账户可用余额=充值金额+代金券金额-已消费金额-已退款金额-已冻结金额

    计费周期

    每5分钟为一个计费周期,不足5分钟则按5分钟计算,按照 Job 运行时长结算。

    计费时长

    从 Job 开始运行时计费,到 Job 运行结束后结束计费

    账单明细

    登录账户中心,点击“账户管理”,可在账户概览页面查看最近账单明细,点击"全部账单”,可跳转查看全部账单明细。详细操作流程请参见账单明细

    欠费说明

    如果账号内存在欠费账单,您无法继续使用计算资源。尽快充值结清欠费账单后可继续使用。

    查看欠费金额

    1 登录账户中心

    2 在账户概览可查看欠费金额

    退款说明

    现金余额支持退款

    需要登录账户中心,点击账户管理>充值订单,可退款的充值订单会在操作列显示退款按钮。点击可发起退款申请,详细操作流程请参见申请退款

    - + \ No newline at end of file diff --git a/zh/next/cloud/billing/recharge/index.html b/zh/next/cloud/billing/recharge/index.html index edab4c596..11b223e4d 100644 --- a/zh/next/cloud/billing/recharge/index.html +++ b/zh/next/cloud/billing/recharge/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    充值和退款

    充值渠道

    Starwhale目前支持通过微信渠道进行充值。

    充值操作步骤

    操作路径:

    1 登录账户中心,点击“去充值”,可跳转至充值页面。

    image

    2 选择或者输入充值金额,充值金额需要大于50元,同时注意支付渠道的限额(超过限额会无法支付成功)。

    image

    3 选择充值方式,点击“确认充值”,跳转第三方支付渠道完成付款。

    image

    充值订单

    查看充值订单

    操作路径:

    1 登录账户中心,点击“账户管理”,可在账户概览页面查看最近充值订单,点击"全部订单,可跳转查看全部充值订单。

    image

    image

    继续支付充值订单

    如您在充值页面,点击“开始充值”后,因某些原因没有支付,可在30分钟内继续支付。

    操作路径:

    1 登录账户中心,点击“账户管理”,点击账户管理>充值订单菜单栏进入充值订单列表。

    image

    2 找到需要继续支付的订单,点击“继续支付

    image

    3 选择充值方式,点击“确认充值”,跳转第三方支付渠道完成付款。

    image

    取消充值订单

    操作路径:

    1 登录账户中心,点击“账户管理”,点击账户管理>充值订单菜单栏进入充值订单列表。

    image

    2 找到需要取消的订单,点击“取消”,弹出确认弹窗后,点击“”,可取消充值订单。

    image

    - + \ No newline at end of file diff --git a/zh/next/cloud/billing/refund/index.html b/zh/next/cloud/billing/refund/index.html index 083e210e2..02d1162b7 100644 --- a/zh/next/cloud/billing/refund/index.html +++ b/zh/next/cloud/billing/refund/index.html @@ -10,7 +10,7 @@ - + @@ -18,7 +18,7 @@
    版本:WIP

    账户退款

    申请退款

    1 登录账户中心,点击“账户管理”,点击账户管理>充值订单菜单栏进入充值订单列表。

    image

    2 找到需要退款的订单,点击“退款”,填写退款原因,确认退款金额,可申请退款。

    ::: tips: 退订款项将原路退回,即通过微信支付的订单会退回到支付使用的微信 :::

    image

    image

    image

    image

    - + \ No newline at end of file diff --git a/zh/next/cloud/billing/voucher/index.html b/zh/next/cloud/billing/voucher/index.html index 8cdd51744..bd18d40ac 100644 --- a/zh/next/cloud/billing/voucher/index.html +++ b/zh/next/cloud/billing/voucher/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    代金券

    什么是代金券

    代金券是starwhale以虚拟券形式给予客户的资金类权益,可用于抵扣运行时所使用资源的费用。

    如何查看我的代金券?

    登录Starwhale,进入“账户中心>代金券” 可查看代金券的编号,面值,余额,状态等信息。

    image

    点击右侧操作列“使用明细”打开“代金券使用明细页”,查看该代金券的交易时间、编号、支出等抵扣详细记录。

    image

    如何使用代金券?

    代金券适用于抵扣消费,如果您的Starwhale账户内有代金券,系统会优先抵扣代金券金额,代金券余额为0后会抵扣充值余额。

    - + \ No newline at end of file diff --git a/zh/next/cloud/index.html b/zh/next/cloud/index.html index 29d8ac682..39ce63dd5 100644 --- a/zh/next/cloud/index.html +++ b/zh/next/cloud/index.html @@ -10,13 +10,13 @@ - +
    - + \ No newline at end of file diff --git a/zh/next/community/contribute/index.html b/zh/next/community/contribute/index.html index 54df97b75..7611b75c5 100644 --- a/zh/next/community/contribute/index.html +++ b/zh/next/community/contribute/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    Starwhale 开源贡献指南

    参与贡献

    Starwhale 非常欢迎来自开源社区的贡献,包括但不限于以下方式:

    • 描述使用过程中的遇到的问题
    • 提交Feature Request
    • 参与Slack和Github Issues讨论
    • 参与Code Review
    • 改进文档和示例程序
    • 修复程序Bug
    • 增加Test Case
    • 改进代码的可读性
    • 开发新的Features
    • 编写Enhancement Proposal

    可以通过以下方式参与开发者社区,获取最新信息和联系Starwhale开发者:

    Starwhale社区使用Github Issues来跟踪问题和管理新特性的开发。可以选择"good first issue"或"help wanted"标签的issue,作为参与开发Starwhale的起点。

    Starwhale资源列表

    代码基本结构

    核心目录组织及功能说明如下:

    • client:swcli和Python SDK的实现,使用Python3编写,对应Starwhale Standalone Instance的所有功能。
      • api:Python SDK的接口定义和实现。
      • cli:Command Line Interface的入口点。
      • base:Python 端的一些基础抽象。
      • core:Starwhale 核心概念的实现,包括Dataset、Model、Runtime、Project、Job、Evaluation等。
      • utils:Python 端的一些工具函数。
    • console:前端的实现,使用React + TypeScript编写,对应Starwhale Cloud Instance的Web UI。
    • server:Starwhale Controller的实现,使用Java编写,对应Starwhale Cloud Instance的后端API。
    • docker:Helm Charts,绝大多数Docker Image的Dockerfile等。
    • docs:Starwhale官方文档。
    • example:示例程序,包含MNIST等例子。
    • scripts:一些Bash和Python脚本,用来进行E2E测试和软件发布等。

    Fork&Clone Starwhale仓库

    您需要fork Starwhale仓库代码并clone到本机,

    搭建针对Standalone Instance的本地开发环境

    Standalone Instance采用Python编写,当要修改Python SDK和swcli时,需要进行相应的环境搭建。

    Standalone本地开发环境前置条件

    • OS:Linux或macOS
    • Python:3.7~3.11
    • Docker:>=19.03 (非必须,当调试dockerize、生成docker image或采用docker为载体运行模型任务时需要)
    • Python隔离环境:Python venv 或 virtualenv 或 conda等都可以,用来构建一个隔离的Python环境

    从源码进行安装

    基于上一步clone到本地的仓库目录:starwhale,并进入到client子目录:

    cd starwhale/client

    使用Conda创建一个Starwhale开发环境,或者使用venv/virtualenv等创建:

    conda create -n starwhale-dev python=3.8 -y
    conda activate starwhale-dev

    安装Client包及依赖到starwhale-dev环境中:

    make install-sw
    make install-dev-req

    输入swcli --version命令,观察是否安装成功,开发环境的swcli版本是 0.0.0.dev0

    ❯ swcli --version
    swcli, version 0.0.0.dev0

    ❯ swcli --version
    /home/username/anaconda3/envs/starwhale-dev/bin/swcli

    本地修改代码

    现在可以对Starwhale代码进行修改,不需要重复安装(make install-sw命令)就能在当前starwhale-dev环境是测试cli或sdk。Starwhale Repo中设置了 .editorconfig 文件,大部分IDE或代码编辑器会自动支持该文件的导入,采用统一的缩进设置。

    执行代码检查和测试

    starwhale 目录中操作,会执行单元测试、client的e2e测试、mypy检查、flake8检查和isort检查等。

    make client-all-check

    搭建针对Cloud Instance的本地开发环境

    Cloud Instance的后端采用Java编写,前端采用React+TypeScript编写,可以按需搭建相应的开发环境。

    搭建前端Console开发环境

    搭建后端Server开发环境

    • 开发语言:Java
    • 项目构建工具:Maven
    • 开发框架:Spring Boot+Mybatis
    • 测试框架:Junit5(其中mock框架为mockito,断言部分使用hamcrest,数据库、web服务等模拟使用Testcontainers)
    • 代码检查:使用maven插件 maven-checkstyle-plugin

    Server开发环境前置条件

    • OS:Linux、macOS或Windows
    • JDK: >=11
    • Docker:>=19.03
    • Maven:>=3.8.1
    • Mysql:>=8.0.29
    • Minio
    • Kubernetes cluster/Minikube(如果没有k8s集群,可以使用Minikube作为开发调试时的备选方案)

    修改代码并增加单测

    现在可以进入到相应模块,对server端的代码进行修改、调整。其中业务功能代码位置为src/main/java,单元测试目录为src/test/java。

    执行代码检查和单元测试

    cd starwhale/server
    mvn clean package

    本地部署服务

    • 前置服务

      • Minikube(可选,无k8s集群时可使用此服务,安装方式可见:Minikube

        minikube start
        minikube addons enable ingress
        minikube addons enable ingress-dns
      • Mysql

        docker run --name sw-mysql -d \
        -p 3306:3306 \
        -e MYSQL_ROOT_PASSWORD=starwhale \
        -e MYSQL_USER=starwhale \
        -e MYSQL_PASSWORD=starwhale \
        -e MYSQL_DATABASE=starwhale \
        mysql:latest
      • Minio

        docker run --name minio -d
        -p 9000:9000 --publish 9001:9001
        -e MINIO_DEFAULT_BUCKETS='starwhale'
        -e MINIO_ROOT_USER="minioadmin"
        -e MINIO_ROOT_PASSWORD="minioadmin"
        bitnami/minio:latest
    • 打包server程序

      若部署server端时,需要把前端同时部署上,可先执行前端部分的构建命令,然后执行'mvn clean package',则会自动将已编译好的前端文件打包进来。

      使用如下命令对程序进行打包:

      cd starwhale/server
      mvn clean package
    • 指定server启动所需的环境变量

      # Minio相关配置
      export SW_STORAGE_ENDPOINT=http://${Minio IP,默认为127.0.0.1}:9000
      export SW_STORAGE_BUCKET=${Minio bucket,默认为starwhale}
      export SW_STORAGE_ACCESSKEY=${Minio accessKey,默认为starwhale}
      export SW_STORAGE_SECRETKEY=${Minio secretKey,默认为starwhale}
      export SW_STORAGE_REGION=${Minio region,默认为local}
      # kubernetes配置
      export KUBECONFIG=${.kube配置文件所在路径}\.kube\config

      export SW_INSTANCE_URI=http://${Server服务所在机器IP}:8082
      # Mysql相关配置
      export SW_METADATA_STORAGE_IP=${Mysql IP,默认为127.0.0.1}
      export SW_METADATA_STORAGE_PORT=${Mysql port,默认为3306}
      export SW_METADATA_STORAGE_DB=${Mysql dbname,默认为starwhale}
      export SW_METADATA_STORAGE_USER=${Mysql user,默认为starwhale}
      export SW_METADATA_STORAGE_PASSWORD=${user password,默认为starwhale}
    • 部署server服务

      使用IDE或如下方式部署均可。

      java -jar controller/target/starwhale-controller-0.1.0-SNAPSHOT.jar
    • 功能调试

      这里有两种方式对修改的功能进行调试:

      • 使用swagger-ui进行接口调试,访问 /swagger-ui/index.html找到对应的api即可。
      • 或直接在ui访问,进行相应功能的调试(前提是打包时已经按说明将前端代码进行了提前构建)
    - + \ No newline at end of file diff --git a/zh/next/concepts/index.html b/zh/next/concepts/index.html index b048ea364..89fa54af2 100644 --- a/zh/next/concepts/index.html +++ b/zh/next/concepts/index.html @@ -10,13 +10,13 @@ - + - + \ No newline at end of file diff --git a/zh/next/concepts/names/index.html b/zh/next/concepts/names/index.html index 91fa11794..033e067de 100644 --- a/zh/next/concepts/names/index.html +++ b/zh/next/concepts/names/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    Starwhale中的命名规则

    下文的命名是指对Starwhale中的项目、模型、数据集、运行时以及版本标签进行命名。

    名称限制

    • 名称不区分大小写。
    • 名称必须仅由大小写字母“A-Z a-z”、数字“0-9”、连字符“-”、点“.”和下划线“_”组成。
    • 名称应始终以字母或“_”字符开头。
    • 名称的最大长度为80。

    名称唯一性要求

    • 资源名称在其所影响范围内必须是唯一的。例如,项目名称在实例中必须是唯一的,模型名称在其所在项目中必须是唯一的。
    • 同一个项目下同类资源必须使用不同的名称,包括那些已删除的资源。 例如,项目“Apple”不能有两个名为“Alice”的模型,即使其中一个已经被删除。
    • 不同种类的资源可以有相同的名称。 例如,一个项目、一个模型和一个数据集可以同时被命名为“Alice”。
    • 不同项目的资源可以具有相同的名称。 例如,“Apple”项目中的模型和“Banana”项目中的模型可以具有相同的名称“Alice”。
    • 已经被垃圾回收的资源名称可以重复使用。 例如,将项目“Apple”中名称为“Alice”的模型移除并进行垃圾回收后,该项目可以有一个新的同名模型“Alice”。
    - + \ No newline at end of file diff --git a/zh/next/concepts/project/index.html b/zh/next/concepts/project/index.html index 7ef19e05d..1a525af4b 100644 --- a/zh/next/concepts/project/index.html +++ b/zh/next/concepts/project/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    Starwhale中的项目

    “项目”是组织不同资源(如模型、数据集等)的基本单位。您可以将项目用于不同的目的。例如,您可以为数据科学家团队、产品线或特定模型创建项目。用户通常在日常工作中会参与一个或多个项目。

    Starwhale Server/Cloud 项目按账号分组。Starwhale Standalone 没有帐号概念。所以您不会在S tarwhale Standalone 项目中看到任何帐号前缀。Starwhale Server/Cloud项目可以是“公共”或“私有”。公共项目意味着同一实例上的所有用户在默认情况下都自动成为该项目的“访客”角色。有关角色的更多信息,请参阅Starwhale中的角色和权限

    Starwhale Standalone会自动创建一个“self”项目并将其配置为默认项目。

    - + \ No newline at end of file diff --git a/zh/next/concepts/roles-permissions/index.html b/zh/next/concepts/roles-permissions/index.html index defa6502e..1d2e11298 100644 --- a/zh/next/concepts/roles-permissions/index.html +++ b/zh/next/concepts/roles-permissions/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    Starwhale中的角色和权限

    角色用于为用户分配权限。只有Starwhale Server/Cloud有角色和权限,Starwhale Standalone没有相应概念。系统会自动创建一个管理员角色并分配给默认用户“starwhale”。一些敏感操作只能由具有管理员角色的用户执行,例如在Starwhale Server中创建新的账号。

    每个项目具有三类角色:

    • 管理员Admin - 项目管理员可以读写项目数据并将项目角色分配给用户。
    • 维护者Maintainer - 项目维护者可以读写项目数据。
    • 访客Guest - 项目访客只能读取项目数据。
    动作管理员Admin维护者Maintainer访客Guest
    管理项目成员
    编辑项目
    查看项目
    创建评价
    删除评价
    查看评价
    创建数据集
    更新数据集
    删除数据集
    查看数据集
    创建模型
    更新模型
    删除模型
    查看型号
    创建运行时
    更新运行时间
    删除运行时
    查看运行时间

    创建项目的用户成为第一个项目管理员。他可以在这之后将角色分配给其他用户。

    - + \ No newline at end of file diff --git a/zh/next/concepts/versioning/index.html b/zh/next/concepts/versioning/index.html index 8bad9b6fd..94a97afdb 100644 --- a/zh/next/concepts/versioning/index.html +++ b/zh/next/concepts/versioning/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    Starwhale中的资源版本控制

    • Starwhale管理所有模型、数据集和运行时的历史记录。对特定资源的每次更新都会附加一个新版本的历史记录。
    • 版本由version id标识。version id是由 Starwhale自动生成的随机字符串,并按其创建时间排序。
    • 版本可以有标签。Starwhale使用版本标签来提供人性化的版本表示。默认情况下,Starwhale会为每个版本附加一个默认标签。默认标记是字母“v”后跟一个数字。对于每个版本化的资源,第一个版本标签始终标记为“v0”,第二个版本标记为“v1”,依此类推。有一个特殊的标签“latest”总是指向最新的版本。删除版本后,将不会重复使用其默认标签。例如,有一个带有标签“v0、v1、v2”的模型。 删除“v2”后,标签将为“v0、v1”。 接下来一个标签将是“v3”而不是“v2”。您可以将自己定义的标签附加到任何版本并随时删除它们。
    • Starwhale使用线性历史,不提供分支。
    • Starwhale资源无法真正回滚。当要恢复某个历史版本时,Starwhale会复制该版本数据并将其作为新版本追加到历史记录的末尾。您可以手动删除和恢复历史版本。
    - + \ No newline at end of file diff --git a/zh/next/dataset/index.html b/zh/next/dataset/index.html index 084d800c6..f64624918 100644 --- a/zh/next/dataset/index.html +++ b/zh/next/dataset/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    Starwhale 数据集

    overview

    设计概述

    Starwhale Dataset 定位

    Starwhale Dataset 包含数据构建、数据加载和数据可视化三个核心阶段,是一款面向ML/DL领域的数据管理工具。Starwhale Dataset 能直接使用 Starwhale Runtime 构建的环境,能被 Starwhale ModelStarwhale Evaluation 无缝集成,是 Starwhale MLOps 工具链的重要组成部分。

    根据 Machine Learning Operations (MLOps): Overview, Definition, and Architecture 对MLOps Roles的分类,Starwhale Dataset的三个阶段针对用户群体如下:

    • 数据构建:Data Engineer、Data Scientist
    • 数据加载:Data Scientist、ML Developer
    • 数据可视化:Data Engineer、Data Scientist、ML Developer

    mlops-users

    核心功能

    • 高效加载:数据集原始文件存储在 OSS 或 NAS 等外部存储上,使用时按需加载,不需要数据落盘。
    • 简单构建:既支持从 Image/Video/Audio 目录、json文件和 Huggingface 数据集等来源一键构建数据集,又支持编写 Python 代码构建完全自定义的数据集。
    • 版本管理:可以进行版本追踪、数据追加等操作,并通过内部抽象的 ObjectStore,避免数据重复存储。
    • 数据集分发:通过 swcli dataset copy 命令,实现 Standalone 实例和 Cloud/Server 实例的双向数据集分享。
    • 数据可视化:Cloud/Server 实例的 Web 界面中可以对数据集提供多维度、多类型的数据呈现。
    • 制品存储:Standalone 实例能存储本地构建或分发的 swds 系列文件,Cloud/Server 实例使用对象存储提供集中式的 swds 制品存储。
    • Starwhale无缝集成Starwhale Dataset 能使用 Starwhale Runtime 构建的运行环境构建数据集。Starwhale EvaluationStarwhale Model 直接通过 --dataset 参数指定数据集,就能完成自动数据加载,便于进行推理、模型评测等环境。

    关键元素

    • swds 虚拟包文件:swdsswmpswrt 不一样,不是一个打包的单一文件,而是一个虚拟的概念,具体指的是一个目录,是 Starwhale 数据集某个版本包含的数据集相关的文件,包括 _manifest.yaml, dataset.yaml, 数据集构建的Python脚本和数据文件的链接等。可以通过 swcli dataset info 命令查看swds所在目录。swds 是Starwhale Dataset 的简写。

    swds-tree.png

    • swcli dataset 命令行:一组dataset相关的命令,包括构建、分发和管理等功能,具体说明参考CLI Reference
    • dataset.yaml 配置文件:描述数据集的构建过程,可以完全省略,通过 swcli dataset build 参数指定,可以认为 dataset.yaml 是build命令行参数的一种配置文件表示方式。swcli dataset build 参数优先级高于 dataset.yaml
    • Dataset Python SDK:包括数据构建、数据加载和若干预定义的数据类型,具体说明参考Python SDK
    • 数据集构建的 Python 脚本:使用 Starwhale Python SDK 编写的用来构建数据集的一系列脚本。

    最佳实践

    Starwhale Dataset 的构建是独立进行的,如果编写构建脚本时需要引入第三方库,那么使用 Starwhale Runtime 可以简化 Python 的依赖管理,能保证数据集的构建可复现。Starwhale 平台会尽可能多的内建开源数据集,让用户 copy 下来数据集后能立即使用。

    命令行分组

    Starwhale Dataset 命令行从使用阶段的角度上,可以划分如下:

    • 构建阶段
      • swcli dataset build
    • 可视化阶段
      • swcli dataset diff
      • swcli dataset head
    • 分发阶段
      • swcli dataset copy
    • 基本管理
      • swcli dataset tag
      • swcli dataset info
      • swcli dataset history
      • swcli dataset list
      • swcli dataset summary
      • swcli dataset remove
      • swcli dataset recover

    Starwhale Dataset Viewer

    目前 Cloud/Server 实例中 Web UI 可以对数据集进行可视化展示,目前只有使用 Python SDK 的DataType 才能被前端正确的解释,映射关系如下:

    • Image:展示缩略图、放大图、MASK类型图片,支持 image/pngimage/jpegimage/webpimage/svg+xmlimage/gifimage/apngimage/avif 格式。
    • Audio:展示为音频wave图,可播放,支持 audio/mp3audio/wav 格式。
    • Video:展示为视频,可播放,支持 video/mp4video/avivideo/webm 格式。
    • GrayscaleImage:展示灰度图,支持 x/grayscale 格式。
    • Text:展示文本,支持 text/plain 格式,设置设置编码格式,默认为utf-8。
    • Binary和Bytes:暂不支持展示。
    • Link:上述几种多媒体类型都支持指定link作为存储路径。

    Starwhale Dataset 数据格式

    数据集由多个行组成,每个行成为为一个样本,每个样本包含若干 features ,features 是一个类 dict 结构,对key和value有一些简单的限制[L]

    • dict的key必须为str类型。
    • dict的value必须是 int/float/bool/str/bytes/dict/list/tuple 等 Python 的基本类型,或者 Starwhale 内置的数据类型
    • 不同样本的数据相同key的value,不需要保持同一类型。
    • 如果value是list或者tuple,其元素的数据类型必须一致。
    • value为dict时,其限制等同于限制[L]

    例子:

    {
    "img": GrayscaleImage(
    link=Link(
    "123",
    offset=32,
    size=784,
    _swds_bin_offset=0,
    _swds_bin_size=8160,
    )
    ),
    "label": 0,
    }

    文件类数据的处理方式

    Starwhale Dataset 对文件类型的数据进行了特殊处理,如果您不关心 Starwhale 的实现方式,可以忽略本小节。

    根据实际使用场景,Starwhale Dataset 对基类为 starwhale.BaseArtifact 的文件类数据有两种处理方式:

    • swds-bin: Starwhale 以自己的二进制格式 (swds-bin) 将数据合并成若干个大文件,能高效的进行索引、切片和加载。
    • remote-link: 满足用户的原始数据存放在某些外部存储上,比如 OSS 或 NAS 等,原始数据较多,不方便搬迁或者已经用一些内部的数据集实现进行封装过,那么只需要在数据中使用 link,就能建立索引。

    在同一个Starwhale 数据集中,可以同时包含两种类型的数据。

    - + \ No newline at end of file diff --git a/zh/next/dataset/yaml/index.html b/zh/next/dataset/yaml/index.html index 04f7797be..943d7e3ff 100644 --- a/zh/next/dataset/yaml/index.html +++ b/zh/next/dataset/yaml/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    dataset.yaml 使用指南

    提示

    dataset.yaml 对于 swcli dataset build 构建数据集的过程是非必要的。

    Starwhale Dataset 构建的时候使用 dataset.yaml,若省略 dataset.yaml,则可以在 swcli dataset build 命令行参数中描述相关配置,可以认为 dataset.yamlbuild 命令行的配置文件化表述。

    YAML 字段描述

    字段描述是否必要类型默认值
    nameStarwhale Dataset的名字String
    handler为一个函数,返回一个Generator或一个可迭代的对象或一个实现 __iter__ 方法的类,格式为 {module 路径}:{类名函数名}String
    desc数据集描述信息String""
    versiondataset.yaml格式版本,目前仅支持填写 1.0String1.0
    attr数据集构建参数Dict
    attr.volume_sizeswds-bin格式的数据集每个data文件的大小。当写数字时,单位bytes;也可以是数字+单位格式,如64M, 1GB等Int或Str64MB
    attr.alignment_sizeswds-bin格式的数据集每个数据块的数据alignment大小,如果设置alignment_size为4k,数据块大小为7.9K,则会补齐0.1K的空数据,让数据块为alignment_size的整数倍,提升page size等读取效率Integer或String128

    使用示例

    最简示例

    name: helloworld
    handler: dataset:ExampleProcessExecutor

    helloworld的数据集,使用dataset.yaml目录中dataset.py文件中的 ExampleProcessExecutor 类进行数据构建。

    MNIST数据集构建示例

    name: mnist
    handler: mnist.dataset:DatasetProcessExecutor

    desc: MNIST data and label test dataset

    attr:
    alignment_size: 1k
    volume_size: 4M

    handler为generator function的例子

    dataset.yaml 内容:

    name: helloworld
    handler: dataset:iter_item

    dataset.py 内容:

    def iter_item():
    for i in range(10):
    yield {"img": f"image-{i}".encode(), "label": i}

    本例中,handler为一个generator function,Starwhale SDK根据首个yield出来的元素为非Starwhale.Link类型,等同于继承 starwhale.SWDSBinBuildExecutor 类。

    - + \ No newline at end of file diff --git a/zh/next/evaluation/heterogeneous/node-able/index.html b/zh/next/evaluation/heterogeneous/node-able/index.html index ea6fcc8a9..b797e1f95 100644 --- a/zh/next/evaluation/heterogeneous/node-able/index.html +++ b/zh/next/evaluation/heterogeneous/node-able/index.html @@ -10,7 +10,7 @@ - + @@ -24,7 +24,7 @@ 参考 链接

    v0.13.0-rc.1 为例

    kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.13.0-rc.1/nvidia-device-plugin.yml

    注意: 此操作会在所有的 K8s 节点中运行 NVIDIA 的 device plugin 插件, 如果之前配置过, 则会被更新, 请谨慎评估使用的镜像版本

  • 确认 GPU 可以在集群中发现和使用 参考下边命令, 查看 Jetson 节点的 Capacity 中有 nvidia.com/gpu, GPU 即被 K8s 集群正常识别

    # kubectl describe node orin | grep -A15 Capacity
    Capacity:
    cpu: 12
    ephemeral-storage: 59549612Ki
    hugepages-1Gi: 0
    hugepages-2Mi: 0
    hugepages-32Mi: 0
    hugepages-64Ki: 0
    memory: 31357608Ki
    nvidia.com/gpu: 1
    pods: 110
  • 制作和使用自定义镜像

    文章前面提到的 l4t-jetpack 镜像可以满足我们一般的使用, 如果我们需要自己定制更加精简或者更多功能的镜像, 可以基于 l4t-base 来制作 相关 Dockerfile 可以参考 Starwhale为mnist制作的镜像

    - + \ No newline at end of file diff --git a/zh/next/evaluation/heterogeneous/virtual-node/index.html b/zh/next/evaluation/heterogeneous/virtual-node/index.html index dfe9f78b2..5cef8a49e 100644 --- a/zh/next/evaluation/heterogeneous/virtual-node/index.html +++ b/zh/next/evaluation/heterogeneous/virtual-node/index.html @@ -10,7 +10,7 @@ - + @@ -19,7 +19,7 @@ 此方案被各云厂商广泛用于 serverless 容器集群方案, 比如阿里云的 ASK, Amazon 的 AWS Fargate 等.

    原理

    virtual kubelet 框架将 kubelet 对于 Node 的相关接口进行实现, 只需要简单的配置即可模拟一个节点. 我们只需要实现 PodLifecycleHandler 接口即可支持:

    • 创建, 更新, 删除 Pod
    • 获取 Pod 状态
    • 获取 Container 日志

    将设备加入集群

    如果我们的设备由于资源限制等情况无法作为 K8s 的一个节点进行服务, 那么我们可以通过使用 virtual kubelet 模拟一个代理节点的方式对这些设备进行管理, Starwhale Controller 和设备的控制流如下


    ┌──────────────────────┐ ┌────────────────┐ ┌─────────────────┐ ┌────────────┐
    │ Starwhale Controller ├─────►│ K8s API Server ├────►│ virtual kubelet ├────►│ Our device │
    └──────────────────────┘ └────────────────┘ └─────────────────┘ └────────────┘

    virtual kubelet 将 Starwhale Controller 下发下来的 Pod 编排信息转化为对设备的控制行为, 比如 ssh 到设备上执行一段命令, 或者通过 USB 或者串口发送一段消息等.

    下面是使用 virtual kubelet 的方式来对一个未加入集群的可以 ssh 的设备进行控制的示例

    1. 准备证书
    • 创建文件 vklet.csr, 内容如下
    [req]
    req_extensions = v3_req
    distinguished_name = req_distinguished_name
    [req_distinguished_name]
    [v3_req]
    basicConstraints = CA:FALSE
    keyUsage = digitalSignature, keyEncipherment
    extendedKeyUsage = serverAuth
    subjectAltName = @alt_names
    [alt_names]
    IP = 1.2.3.4
    • 生成证书
    openssl genrsa -out vklet-key.pem 2048
    openssl req -new -key vklet-key.pem -out vklet.csr -subj '/CN=system:node:1.2.3.4;/C=US/O=system:nodes' -config ./csr.conf
    • 提交证书
    cat vklet.csr| base64 | tr -d "\n" # 输出内容作为 csr.yaml 文件中 spec.request 的内容

    csr.yaml

    apiVersion: certificates.k8s.io/v1
    kind: CertificateSigningRequest
    metadata:
    name: vklet
    spec:
    request: ******************************************************
    signerName: kubernetes.io/kube-apiserver-client
    expirationSeconds: 1086400
    usages:
    - client auth
     kubectl apply -f csr.yaml
    kubectl certificate approve vklet
    kubectl get csr vklet -o jsonpath='{.status.certificate}'| base64 -d > vklet-cert.pem

    现在我们得到了 vklet-cert.pem

    • 编译 virtual kubelet
    git clone https://github.com/virtual-kubelet/virtual-kubelet
    cd virtual-kubelet && make build

    创建节点的配置文件 mock.json

    {
    "virtual-kubelet":
    {
    "cpu": "100",
    "memory": "100Gi",
    "pods": "100"
    }
    }

    启动 virtual kubelet

    export APISERVER_CERT_LOCATION=/path/to/vklet-cert.pem
    export APISERVER_KEY_LOCATION=/path/to/vklet-key.pem
    export KUBECONFIG=/path/to/kubeconfig

    virtual-kubelet --provider mock --provider-config /path/to/mock.json

    至此, 我们使用 virtual kubelet 模拟了一个 100 core + 100G 内存的节点.

    • 增加 PodLifecycleHandler 的实现, 将 Pod 编排中的重要信息转化为 ssh 命令执行, 并且收集日志待 Starwhale Controller 收集

    具体实现可参考 ssh executor

    - + \ No newline at end of file diff --git a/zh/next/evaluation/index.html b/zh/next/evaluation/index.html index 284540108..e3074a1fe 100644 --- a/zh/next/evaluation/index.html +++ b/zh/next/evaluation/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    Starwhale 模型评测

    设计概述

    Starwhale Evaluation 定位

    Starwhale Evaluation 目标是对模型评测进行全流程管理,包括创建 Job、分发 Task、查看模型评测报告和基本管理等。Starwhale Evaluation 是 Starwhale构建的 MLOps 工具链使用 Starwhale ModelStarwhale DatasetStarwhale Runtime 三个基础元素,在模型评测这个场景上的具体应用,后续还会包含 Starwhale Model ServingStarwhale Training 等应用场景。

    核心功能

    • 可视化展示swcli和 Web UI都提供对模型评测结果的可视化展示,支持多个结果的对比等功能,同时用户可以自定义记录评测中间过程。
    • 多场景适配:不管是在笔记本的单机环境,还是在分布式服务器集群环境,都能使用统一的命令、Python脚本、制品和操作方法进行模型评测,满足不同算力、不同数据量的外部环境要求。
    • Starwhale无缝集成:使用Starwhale Runtime提供的运行环境,将 Starwhale Dataset 作为数据输入,在 Starwhale Model 中运行模型评测任务,不管是在 swcli、Python SDK 还是 Cloud/Server 实例 Web UI中,都能简单的进行配置。

    关键元素

    • swcli model run 命令行: 能够完成模型的批量、离线式评测。
    • swcli model serve 命令行: 能够完成模型的在线评测。

    最佳实践

    命令行分组

    从完成 Starwhale Evaluation 全流程任务的角度,可以将所涉及的命令分组如下:

    • 基础准备阶段
      • swcli dataset build 或 Starwhale Dataset Python SDK
      • swcli model build 或 Starwhale Model Python SDK
      • swcli runtime build
    • 评测阶段
      • swcli model run
      • swcli model serve
    • 结果展示阶段
      • swcli job info
    • 基本管理
      • swcli job list
      • swcli job remove
      • swcli job recover

    job-step-task 抽象

    • job: 一次模型评测任务就是一个 job,一个 job 包含一个或多个 step
    • step: step 对应评测过程中的某个阶段。使用PipelineHandler的默认评测过程,step就是predictevaluate;用户自定义的评测过程,step 就是使用 @handler, @evaluation.predict, @evaluation.evaluate 修饰的函数。step 之间可以有依赖关系,形成一个DAG。一个 step 包含一个或多个 task。同一 step 中的不同 task,执行逻辑是一致的,只是输入参数不同,常见做法是将数据集分割成若干部分,然后传入每个task 中,task 可以并行执行。
    • task: task 是最终运行的实体。在 Cloud/Server 实例中,一个 task 就是一个Pod的container; 在Standalone 实例中,一个 task 就是一个 Python Thread。

    job-step-task 的抽象是实现 Starwhale Evaluation 分布式运行的基础。

    - + \ No newline at end of file diff --git a/zh/next/faq/index.html b/zh/next/faq/index.html index ff7ce30e0..f76723ba8 100644 --- a/zh/next/faq/index.html +++ b/zh/next/faq/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    常见问题

    Starwhale Model 拷贝到 Server 时遇到 "413 Client Error: Request Entity Too Large for url" 错误

    • 原因:Ingress 设置的 proxy-body-size(Nginx默认为1MB)小于实际上传文件的大小。
    • 解决方法:请检查 Starwhale Server 的 Ingress 设置,增加 nginx.ingress.kubernetes.io/proxy-body-size: 30g 到 annotations 字段中。

    Starwhale Server 向 Kubernetes 集群中提交任务提示 RBAC 授权错误

    Kubernetes 集群启动了 RBAC,启动 Starwhale Server 的服务账号权限不足,至少需要如下权限:

    ResourceAPI GroupGetListWatchCreateDelete
    jobsbatchYYYYY
    podscoreYYY
    nodescoreYYY
    events""Y

    参考yaml例子:

    apiVersion: rbac.authorization.k8s.io/v1
    kind: ClusterRole
    metadata:
    name: starwhale-role
    rules:
    - apiGroups:
    - ""
    resources:
    - pods
    - nodes
    verbs:
    - get
    - list
    - watch
    - apiGroups:
    - "batch"
    resources:
    - jobs
    verbs:
    - create
    - get
    - list
    - watch
    - delete
    - apiGroups:
    - ""
    resources:
    - events
    verbs:
    - get
    - watch
    - list
    ---
    apiVersion: rbac.authorization.k8s.io/v1
    kind: ClusterRoleBinding
    metadata:
    name: starwhale-binding
    roleRef:
    apiGroup: rbac.authorization.k8s.io
    kind: ClusterRole
    name: starwhale-role
    subjects:
    - kind: ServiceAccount
    name: starwhale
    - + \ No newline at end of file diff --git a/zh/next/getting-started/cloud/index.html b/zh/next/getting-started/cloud/index.html index 21599ad0c..1e3f909a2 100644 --- a/zh/next/getting-started/cloud/index.html +++ b/zh/next/getting-started/cloud/index.html @@ -10,13 +10,13 @@ - +
    -
    版本:WIP

    Starwhale Cloud入门指南

    Starwhale Cloud运行在阿里云上,域名是 https://cloud.starwhale.cn ,后续我们会推出部署在AWS上的 https://cloud.starwhale.ai 服务,需要注意的是,这是两个相互独立的实例,帐户和数据不共享。您可以选择任何一个开始。

    在开始之前,您需要先安装Starwhale Client(swcli)

    注册Starwhale Cloud并创建您的第一个项目

    您可以直接使用自己的GitHub或微信帐号登录,也可以注册一个新的帐号。如果您使用 GitHub 或 微信帐号登录,系统会要求您提供用户名。

    然后您可以创建一个新项目。在本教程中,我们将使用名称 demo 作为项目名称。

    在本地机器上构建数据集、模型和运行时

    按照Starwhale Standalone入门指南中的步骤1到步骤4在本地机器上创建:

    • 一个名为mnist的Starwhale模型
    • 一个名为mnist的Starwhale数据集
    • 一个名为pytorch的Starwhale运行时

    登录云实例

    swcli instance login --username <您的用户名> --password <您的密码> --alias swcloud https://cloud.starwhale.cn

    将数据集、模型和运行时复制到Starwhale Cloud

    swcli model copy mnist swcloud/project/demo
    swcli dataset copy mnist swcloud/project/demo
    swcli runtime copy pytorch swcloud/project/demo

    使用 Web UI 运行评估

    console-create-job.gif

    恭喜! 您已完成Starwhale Cloud的入门指南。

    - +
    版本:WIP

    Starwhale Cloud入门指南

    Starwhale Cloud运行在阿里云上,域名是 https://cloud.starwhale.cn ,后续我们会推出部署在AWS上的 https://cloud.starwhale.ai 服务,需要注意的是,这是两个相互独立的实例,帐户和数据不共享。您可以选择任何一个开始。

    在开始之前,您需要先安装Starwhale Client(swcli)

    注册Starwhale Cloud并创建您的第一个项目

    您可以直接使用自己的GitHub或微信帐号登录,也可以注册一个新的帐号。如果您使用 GitHub 或 微信帐号登录,系统会要求您提供用户名。

    然后您可以创建一个新项目。在本教程中,我们将使用名称 demo 作为项目名称。

    在本地机器上构建数据集、模型和运行时

    按照Starwhale Standalone入门指南中的步骤1到步骤4在本地机器上创建:

    • 一个名为helloworld的Starwhale模型
    • 一个名为mnist64的Starwhale数据集
    • 一个名为helloworld的Starwhale运行时

    登录云实例

    swcli instance login --username <您的用户名> --password <您的密码> --alias swcloud https://cloud.starwhale.cn

    将数据集、模型和运行时复制到Starwhale Cloud

    swcli model copy helloworld swcloud/project/demo
    swcli dataset copy mnist64 swcloud/project/demo
    swcli runtime copy helloworld swcloud/project/demo

    使用 Web UI 运行评估

    console-create-job.gif

    恭喜! 您已完成Starwhale Cloud的入门指南。

    + \ No newline at end of file diff --git a/zh/next/getting-started/index.html b/zh/next/getting-started/index.html index 8f7af87f1..204e7fc79 100644 --- a/zh/next/getting-started/index.html +++ b/zh/next/getting-started/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    入门指南

    首先,您需要安装Starwhale Client(swcli),可以运行如下命令:

    python3 -m pip install starwhale

    更多详细信息请参阅swcli安装指南

    根据您使用的实例类型,您可以参考以下三个入门指南:

    • Starwhale Standalone入门指南 - 本指南可帮助您在台式PC/笔记本电脑上运行一个MNIST评估。这是开始使用Starwhale最快最简单的方法。
    • Starwhale Server入门指南 - 本指南可帮助您在私有服务器上安装Starwhale Server并运行一个MNIST评估。在本指南结束时,您将拥有一个Starwhale Server实例,您可以在其中管理您的数据集和模型。
    • Starwhale Cloud入门指南 - 本指南可帮助您在Starwhale Cloud上创建帐户并运行MNIST评估。这是体验所有Starwhale功能的最简单方法。
    - + \ No newline at end of file diff --git a/zh/next/getting-started/runtime/index.html b/zh/next/getting-started/runtime/index.html index 7ffaf77be..73383d5ae 100644 --- a/zh/next/getting-started/runtime/index.html +++ b/zh/next/getting-started/runtime/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    Starwhale Runtime入门指南

    本文演示如何搭建Pytorch环境的Starwhale Runtime以及如何在不同环境中使用它。该runtime可以满足Starwhale中六个例子的依赖需求:mnist、speech commands、nmt、cifar10、ag_news、PennFudan。相关代码链接:example/runtime/pytorch

    您可以从本教程中学到以下内容:

    • 如何构建Starwhale Runtime。
    • 如何在不同场景下使用Starwhale Runtime。
    • 如何发布Starwhale Runtime。

    前置条件

    基础环境

    运行以下命令以克隆示例代码:

    git clone https://github.com/star-whale/starwhale.git
    cd starwhale/example/runtime/pytorch-cn-mirror #非中国大陆网络可使用pytorch例子

    构建Starwhale Runtime

    ❯ swcli -vvv runtime build --yaml runtime.yaml

    在Standalone Instance中使用Starwhale Runtime

    在shell中使用Starwhale Runtime

    # 激活runtime
    swcli runtime activate pytorch-cn-mirror

    swcli runtime activate会下载runtime的所有python依赖,并在当前shell环境中激活该环境。这个过程可能需要很长时间。

    当runtime被激活时,所有依赖项都已在您的python环境中准备就绪,类似于virtualenv的source venv/bin/activate或者conda的conda activate命令。如果您关闭了shell或切换到另一个shell,则下次使用之前需要重新激活这个runtime。

    在swcli中使用Starwhale Runtime

    # 模型构建中使用runtime
    swcli model build . --runtime pytorch-cn-mirror
    # 数据集构建中使用runtime
    swcli dataset build . --runtime pytorch-cn-mirror
    # 模型评测中使用runtime
    swcli model run --uri mnist/version/v0 --dataset mnist --runtime pytorch-cn-mirror

    将 Starwhale Runtime 复制到另一个实例

    您可以将运行时复制到Server/Cloud实例,然后可以在Server/Cloud实例中使用或由其他用户下载。

    # 将runtime复制到名为“pre-k8s”的Server实例
    ❯ swcli runtime copy pytorch-cn-mirror cloud://pre-k8s/project/starwhale
    - + \ No newline at end of file diff --git a/zh/next/getting-started/server/index.html b/zh/next/getting-started/server/index.html index 6101c5954..cbf74a5fc 100644 --- a/zh/next/getting-started/server/index.html +++ b/zh/next/getting-started/server/index.html @@ -10,13 +10,13 @@ - +
    -
    版本:WIP

    Starwhale Server入门指南

    安装Starwhale Server

    安装 Starwhale Server,参见安装指南

    创建您的第一个项目

    登录服务器

    打开浏览器并在地址栏中输入服务器的 URL。 使用默认用户名(starwhale)和密码(abcd1234)登录。

    console-artifacts.gif

    创建一个新项目

    在本地机器上构建数据集、模型和运行时

    按照Starwhale Standalone入门指南中的步骤1到步骤4在本地机器上创建:

    • 一个名为mnist的Starwhale模型
    • 一个名为mnist的Starwhale数据集
    • 一个名为pytorch的Starwhale运行时

    将数据集、模型和运行时复制到Starwhale Server

    swcli instance login --username <your username> --password <your password> --alias server <Your Server URL>

    swcli model copy mnist server/project/demo
    swcli dataset copy mnistserver/project/demo
    swcli runtime copy pytorch server/project/demo

    使用Web UI运行模型评估

    使用浏览器打开“demo”项目并创建一个新的评估。

    console-create-job.gif

    恭喜! 您已完成Starwhale Server的入门指南。

    - +
    版本:WIP

    Starwhale Server入门指南

    安装Starwhale Server

    安装 Starwhale Server,参见安装指南

    创建您的第一个项目

    登录服务器

    打开浏览器并在地址栏中输入服务器的 URL。 使用默认用户名(starwhale)和密码(abcd1234)登录。

    console-artifacts.gif

    创建一个新项目

    在本地机器上构建数据集、模型和运行时

    按照Starwhale Standalone入门指南中的步骤1到步骤4在本地机器上创建:

    • 一个名为helloworld的Starwhale模型
    • 一个名为mnist64的Starwhale数据集
    • 一个名为helloworld的Starwhale运行时

    将数据集、模型和运行时复制到Starwhale Server

    swcli instance login --username <your username> --password <your password> --alias server <Your Server URL>

    swcli model copy helloworld server/project/demo
    swcli dataset copy mnist64 server/project/demo
    swcli runtime copy helloworld server/project/demo

    使用Web UI运行模型评估

    使用浏览器打开“demo”项目并创建一个新的评估。

    console-create-job.gif

    恭喜! 您已完成Starwhale Server的入门指南。

    + \ No newline at end of file diff --git a/zh/next/getting-started/standalone/index.html b/zh/next/getting-started/standalone/index.html index f01cfb43b..982967d76 100644 --- a/zh/next/getting-started/standalone/index.html +++ b/zh/next/getting-started/standalone/index.html @@ -10,13 +10,13 @@ - +
    -
    版本:WIP

    Starwhale Standalone入门指南

    Starwhale Client(swcli)安装完成后,您就可以使用Starwhale Standalone。

    我们也提供对应的Jupyter Notebook例子,可以在 Google Colab 或本地的 vscode/jupyterlab 中试用。

    下载例子

    通过以下方式克隆Starwhale项目来下载Starwhale示例:

    GIT_LFS_SKIP_SMUDGE=1 git clone https://github.com/star-whale/starwhale.git --depth 1
    cd starwhale

    为了节省例子的下载时间,我们执行git clone命令时,忽略了git-lfs,并只保留最近一次的commit信息。我们选用ML/DL领域的HelloWorld程序-MNIST来介绍如何从零开始构建数据集、模型包和运行环境,并最终完成模型评测。接下来的操作都在 starwhale 目录中进行。

    核心工作流程

    构建 Pytorch 运行时

    运行时示例代码位于example/runtime/pytorch目录中。

    • 构建Starwhale运行时包:

      swcli runtime build --yaml example/runtime/pytorch/runtime.yaml
      提示

      当首次构建Starwhale Runtime时,由于需要创建venv或conda隔离环境,并下载相关的Python依赖,命令执行需要花费一段时间。时间长短取决与所在机器的网络情况和runtime.yaml中Python依赖的数量。建议合理设置机器的 ~/.pip/pip.conf 文件,填写缓存路径和适合当前网络环境的pypi mirror地址。

      处于中国大陆网络环境中的用户,可以参考如下配置:

      [global]
      cache-dir = ~/.cache/pip
      index-url = https://pypi.tuna.tsinghua.edu.cn/simple
      extra-index-url = https://mirrors.aliyun.com/pypi/simple/
    • 检查您本地的Starwhale运行时:

      swcli runtime list
      swcli runtime info pytorch

    构建模型

    模型示例代码位于 example/mnist 目录中。

    • 下载预训练模型文件:

      cd example/mnist
      CN=1 make download-model
      # 非中国大陆网络用户,可以省略 CN=1 环境变量
      cd -
    • 构建一个Starwhale模型:

      swcli model build example/mnist --runtime pytorch
    • 检查您本地的Starwhale模型:

      swcli model list
      swcli model info mnist

    构建数据集

    数据集示例代码位于 example/mnist 目录中。

    • 下载MNIST原始数据:

      cd example/mnist
      CN=1 make download-data
      # 非中国大陆网络用户,可以省略 CN=1 环境变量
      cd -
    • 构建Starwhale数据集:

      swcli dataset build --yaml example/mnist/dataset.yaml
    • 检查您本地的Starwhale数据集:

      swcli dataset list
      swcli dataset info mnist
      swcli dataset head mnist

    运行评估作业

    • 创建评估工作

      swcli -vvv model run --uri mnist --dataset mnist --runtime pytorch
    • 检查评估结果

      swcli job list
      swcli job info $(swcli job list | grep mnist | grep success | awk '{print $1}' | head -n 1)

    恭喜! 您已完成Starwhale Standalone的入门指南。

    - +
    版本:WIP

    Starwhale Standalone入门指南

    Starwhale Client(swcli)安装完成后,您就可以使用Starwhale Standalone。

    我们也提供对应的Jupyter Notebook例子,可以在 Google Colab 或本地的 vscode/jupyterlab 中试用。

    下载例子

    通过以下方式克隆Starwhale项目来下载Starwhale示例:

    GIT_LFS_SKIP_SMUDGE=1 git clone https://github.com/star-whale/starwhale.git --depth 1
    cd starwhale

    为了节省例子的下载时间,我们执行git clone命令时,忽略了git-lfs,并只保留最近一次的commit信息。我们选用ML/DL领域的HelloWorld程序-MNIST来介绍如何从零开始构建数据集、模型包和运行环境,并最终完成模型评测。接下来的操作都在 starwhale 目录中进行。

    核心工作流程

    构建 Starwhale 运行时

    运行时示例代码位于example/helloworld目录中。

    • 构建Starwhale运行时包:

      swcli -vvv runtime build --yaml example/helloworld/runtime.yaml
      提示

      当首次构建Starwhale Runtime时,由于需要创建venv或conda隔离环境,并下载相关的Python依赖,命令执行需要花费一段时间。时间长短取决与所在机器的网络情况和runtime.yaml中Python依赖的数量。建议合理设置机器的 ~/.pip/pip.conf 文件,填写缓存路径和适合当前网络环境的pypi mirror地址。

      处于中国大陆网络环境中的用户,可以参考如下配置:

      [global]
      cache-dir = ~/.cache/pip
      index-url = https://pypi.tuna.tsinghua.edu.cn/simple
      extra-index-url = https://mirrors.aliyun.com/pypi/simple/
    • 检查您本地的Starwhale运行时:

      swcli runtime list
      swcli runtime info helloworld

    构建模型

    模型示例代码位于 example/helloworld 目录中。

    • 构建一个Starwhale模型:

      swcli -vvv model build example/helloworld --name helloworld -m evaluation --runtime helloworld
    • 检查您本地的Starwhale模型:

      swcli model list
      swcli model info helloworld

    构建数据集

    数据集示例代码位于 example/helloworld 目录中。

    • 构建Starwhale数据集:

      swcli runtime activate helloworld
      python3 example/helloworld/dataset.py
      deactivate
    • 检查您本地的Starwhale数据集:

      swcli dataset list
      swcli dataset info mnist64
      swcli dataset head mnist64

    运行评估作业

    • 创建评估工作

      swcli -vvv model run --uri helloworld --dataset mnist64 --runtime helloworld
    • 检查评估结果

      swcli job list
      swcli job info $(swcli job list | grep mnist | grep success | awk '{print $1}' | head -n 1)

    恭喜! 您已完成Starwhale Standalone的入门指南。

    + \ No newline at end of file diff --git a/zh/next/index.html b/zh/next/index.html index aae0c856a..853adbfa5 100644 --- a/zh/next/index.html +++ b/zh/next/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    什么是Starwhale

    概述

    Starwhale是一个 MLOps/LLMOps平台,能够让您的模型创建、评估和发布流程变得更加轻松。它旨在为数据科学家和机器学习工程师创建一个方便的工具。

    Starwhale能够帮助您:

    • 跟踪您的训练/测试数据集历史记录,包括所有数据项及其相关标签,以便您轻松访问它们。
    • 管理您可以在团队中共享的模型包。
    • 在不同的环境中运行您的模型,无论是在 Nvidia GPU服务器上还是在嵌入式设备(如 Cherry Pi)上。
    • 为您的模型快速创建配备交互式 Web UI的在线服务。

    同时,Starwhale 是一个开放的平台,您可以创建插件来满足自己的需求。

    部署选项

    Starwhale的每个部署称为一个实例。所有实例都可以通过Starwhale Client(swcli)进行管理。

    您可以任选以下实例类型之一开始使用:

    • Starwhale Standalone - Starwhale Standalone 本质上是一套存储在本地文件系统中的数据库。它由 Starwhale Client(swcli)创建和管理。您只需安装 swcli 即可使用。目前,一台机器上的每个用户只能拥有一个Starwhale Standalone 实例。我们建议您使用 Starwhale Standalone 来构建和测试您的数据集和模型,然后再将它们推送到 Starwhale Server/Cloud 实例。
    • Starwhale Server - Starwhale Server 是部署在您本地服务器上的服务。除了 Starwhale Client(swcli)的文本交互界面,Starwhale Server还提供 Web UI供您管理数据集和模型,以及在Kubernetes集群中运行模型并查看运行结果。
    • Starwhale Cloud - Starwhale Cloud 是托管在公共云上的服务。 通过在https://cloud.starwhale.cn注册一个账号,您就可以使用Starwhale,而无需安装、运行和维护您自己的实例。 Starwhale Cloud 还提供公共资源供您下载,例如一些流行的开源集数据集、模型和运行时。查看 Starwhale Cloud 实例上的 “starwhale/public”项目以获取更多详细信息。

    在您决定要使用的实例类型时,请考虑以下因素:

    实例类型部署位置维护者用户界面可扩展性
    Starwhale Standalone您的笔记本电脑或本地服务器不需要命令行不可扩展
    Starwhale Server您的数据中心您自己Web UI和命令行可扩展,取决于您的 Kubernetes 集群
    Starwhale Cloud公共云,如AWS或阿里云Starwhale团队Web UI和命令行可扩展,但目前受到云上免费可用资源的限制
    - + \ No newline at end of file diff --git a/zh/next/model/index.html b/zh/next/model/index.html index 8c25b51ac..78d2fba4b 100644 --- a/zh/next/model/index.html +++ b/zh/next/model/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    Starwhale 模型

    overview

    Starwhale 模型是一种机器学习模型的标准包格式,可用于多种用途,例如模型微调、模型评估和在线服务。 Starwhale 模型包含模型文件、推理代码、配置文件等等。

    创建一个 Starwhale 模型

    创建 Starwhale 模型有两种方法:通过 swcli 或通过 SDK

    使用 swcli 创建 Starwhale 模型

    使用 swcli 创建 Starwhale 模型之前,您可以定义一个model.yaml,其中描述了关于Starwhale模型的一些必要信息,然后运行以下命令:

    swcli model build . --model-yaml /path/to/model.yaml

    有关该命令和 model.yaml 的更多信息,请参阅swcli参考。需要注意的是,model.yaml 是非必要的。

    使用 Python SDK 创建 Starwhale 模型

    from starwhale import model, predict

    @predict
    def predict_img(data):
    ...

    model.build(name="mnist", modules=[predict_img])

    管理 Starwhale 模型

    使用 swcli 管理 Starwhale 模型

    命令说明
    swcli model list列出项目中所有Starwhale模型
    swcli model info显示有关Starwhale模型的详细信息
    swcli model copy将Starwhale模型复制到另一个位置
    swcli model remove删除Starwhale模型
    swcli model recover恢复之前删除的Starwhale模型

    使用 Web 界面管理 Starwhale 模型

    管理 Starwhale 模型的历史版本

    Starwhale 模型是版本化的。关于版本的基本信息可以参考 Starwhale中的资源版本控制

    使用 swcli 管理 Starwhale 模型的历史版本

    命令说明
    swcli model history列出Starwhale模型的所有版本
    swcli model info显示某个Starwhale模型版本的详细信息
    swcli model diff比较两个版本的Starwhale模型
    swcli model copy复制某个Starwhale模型版本到新的版本
    swcli model remove删除某个Starwhale模型版本
    swcli model recover恢复以前删除的Starwhale模型版本

    模型评估

    使用swcli进行模型评估

    命令说明
    swcli model run指定某个Starwhale模型进行模型评估

    存储格式

    Starwhale模型是一个打包了原始目录的tar文件。

    - + \ No newline at end of file diff --git a/zh/next/model/yaml/index.html b/zh/next/model/yaml/index.html index 6d3cdc74b..57ec051a2 100644 --- a/zh/next/model/yaml/index.html +++ b/zh/next/model/yaml/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    model.yaml 使用指南

    提示

    model.yaml 对于 swcli model build 构建模型的过程是非必要的。

    Starwhale Model 构建时,若使用 swcli model build 命令,可以通过 --model-yaml 参数指定符合特定格式的yaml文件,简化模型构建的参数指定。

    即使不指定 --model-yaml 参数,swcli model build 也会自动寻找 ${workdir} 目录下的 model.yaml 文件,会提取其中的参数。swcli model build 命令行中指定参数优先级大于 model.yaml 中的等价配置,可以认为 model.yamlbuild 命令行的配置文件化表述。

    当使用 Python SDK 方式构建 Starwhale 模型时,model.yaml 文件不生效。

    YAML 字段描述

    字段描述是否必要类型默认值
    nameStarwhale Model 的名字,等价于 --name 参数。String
    run.modules模型构建时搜索的Python Moduels,可以指定多个模型运行的入口点,格式为 Python 可 Imported 路径。等价于 --module 参数。List[String]
    run.handlerrun.modules的曾用写法,只能指定一个模型运行的入口点,已废弃String
    versiondataset.yaml格式版本,目前仅支持填写 1.0String1.0
    desc数据集描述信息,等价于 --desc 参数。String

    使用示例

    name: helloworld
    run:
    modules:
    - src.evaluator
    desc: "example yaml"

    名称为 helloworld 的 Starwhale 模型,搜索 swcli model build {WORKDIR} 命令中 ${WORKDIR} 目录相对的 src/evaluator.py 文件中被 @evaluation.predict, @evaluation.evaluate@handler 修饰的函数, 或继承自 PipelineHandler 的类,这些函数或类会被加入 Starwhale 模型可运行的入口点列表中,在 swcli model run 或 Web UI 运行时,选择对应的入口点(handler)运行模型。

    model.yaml 是非必要的,yaml 中定义参数可以在 swcli 命令行参数中指定。

    swcli model build . --model-yaml model.yaml

    等价于:

    swcli model build . --name helloworld --module src.evaluator --desc "example yaml"
    - + \ No newline at end of file diff --git a/zh/next/reference/sdk/dataset/index.html b/zh/next/reference/sdk/dataset/index.html index 206f896d7..2c7104c19 100644 --- a/zh/next/reference/sdk/dataset/index.html +++ b/zh/next/reference/sdk/dataset/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    Starwhale 数据集 SDK

    dataset

    获取 starwhale.Dataset 对象,包括创建新的数据集和加载已经存在的数据集两种方式。

    @classmethod
    def dataset(
    cls,
    uri: t.Union[str, Resource],
    create: str = _DatasetCreateMode.auto,
    readonly: bool = False,
    ) -> Dataset:

    参数

    • uri: (str 或 Resource, required)
      • Dataset URI 格式的字符串或 Resource 对象。
    • create: (str, optional)
      • 数据集创建模式,包括 auto, emptyforbid 三种方式。
        • auto 模式: 如果数据集已经存在,不会自动创建数据集;如果数据集不存在,则自动创建数据集。
        • empty 模式: 如果数据集已经存在,则抛出异常;如果数据集不存在,则自动创建数据集。
        • forbid 模式: 如果数据集已经存在,则不做任何事情;如果数据集不存在,则抛出异常。forbid 模式能确保数据集存在。
      • auto 模式是默认值。
    • readonly: (bool, optional)
      • 对于已经存在的数据集,可以指定 readonly=True 保证数据集以只读方式加载。
      • 默认值为 False

    使用示例

    from starwhale import dataset, Image

    # create a new dataset named mnist, and add a row into the dataset
    # dataset("mnist") is equal to dataset("mnist", create="auto")
    ds = dataset("mnist")
    ds.exists() # return False, "mnist" dataset is not existing.
    ds.append({"img": Image(), "label": 1})
    ds.commit()
    ds.close()

    # load a cloud instance dataset in readonly mode
    ds = dataset("cloud://remote-instance/project/starwhale/dataset/mnist", readonly=True)
    labels = [row.features.label in ds]
    ds.close()

    # load a read/write dataset with a specified version
    ds = dataset("mnist/version/mrrdczdbmzsw")
    ds[0].features.label = 1
    ds.commit()
    ds.close()

    # create an empty dataset
    ds = dataset("mnist-empty", create="empty")

    # ensure the dataset existence
    ds = dataset("mnist-existed", create="forbid")

    class starwhale.Dataset

    starwhale.Dataset 实现 Starwhale 数据集的抽象,能够对Standalone/Server/Cloud 实例上的数据集进行操作。

    from_huggingface

    from_huggingface 是一个 classmethod 方法,能够将 Huggingface 上的数据集转化为 Starwhale 数据集。

    def from_huggingface(
    cls,
    name: str,
    repo: str,
    subset: str | None = None,
    split: str | None = None,
    revision: str = "main",
    alignment_size: int | str = D_ALIGNMENT_SIZE,
    volume_size: int | str = D_FILE_VOLUME_SIZE,
    mode: DatasetChangeMode | str = DatasetChangeMode.PATCH,
    cache: bool = True,
    tags: t.List[str] | None = None,
    ) -> Dataset:

    参数

    • name: (str, required)
      • 数据集名称。
    • repo: (str, required)
    • subset: (str, optional)
      • Huggingface的数据集 subset 名称,如果HF数据集有多个 subsets, 您务必要指定一个 subset。
    • split: (str, optional)
      • Huggingface的数据集中 Split 名称。如果没有指定 split,则数据集中所有的 splits 数据都会被构建。
    • revision: (str, optional)
      • Huggingface的数据集版本,默认是 main,即main分支的最新一次提交。参数接受branch, tag 或 commit hash。
    • alignment_size: (int|str, optional)
      • 数据集 blob 文件中数据对齐的尺寸。
      • 默认值为 128,即128个字节对齐。
    • volume_size: (int|str, optional)
      • 数据集 blob 文件的最大尺寸,超过后会产生一个新的 blob 文件。
      • 默认值为 64MB,即每个blob文件64MB大小。
    • mode: (str|DatasetChangeMode, optional)
      • 数据集更新的模式,包括 patch 模式和 overwrite 模式。
      • 默认为 patch 模式。
    • cache: (bool, optional)
      • 是否使用 Huggingface 的本地缓存。
      • 默认使用缓存。
      • 缓存 = 下载文件缓存 + 本地Huggingface 数据集缓存。
    • tags: (List[str], optional)
      • 用户自定义的数据集标签。

    使用示例

    from starwhale import Dataset
    myds = Dataset.from_huggingface("mnist", "mnist")
    print(myds[0])
    from starwhale import Dataset
    myds = Dataset.from_huggingface("mmlu", "cais/mmlu", subset="anatomy", split="auxiliary_train", revision="7456cfb")

    from_json

    from_json 是一个 classmethod 方法,能够将 json 字符串转化为 Starwhale 数据集。

    @classmethod
    def from_json(
    cls,
    name: str,
    json_text: str,
    field_selector: str = "",
    alignment_size: int | str = D_ALIGNMENT_SIZE,
    volume_size: int | str = D_FILE_VOLUME_SIZE,
    mode: DatasetChangeMode | str = DatasetChangeMode.PATCH,
    tags: t.List[str] | None = None,
    ) -> Dataset:

    参数

    • name: (str, required)
      • 数据集名称
    • json_text: (str, required)
      • json 字符串,from_json 函数会序列化该字符串为 Python 对象,然后开始构建 Starwhale 数据集。
    • field_selector: (str, optional)
      • 可以提取 json_text 中特定的 array 结构。
      • 默认从 json 的根提取数据。
    • alignment_size: (int|str, optional)
      • 数据集 blob 文件中数据对齐的尺寸。
      • 默认值为 128,即128个字节对齐。
    • volume_size: (int|str, optional)
      • 数据集 blob 文件的最大尺寸,超过后会产生一个新的 blob 文件。
      • 默认值为 64MB,即每个blob文件64MB大小。
    • mode: (str|DatasetChangeMode, optional)
      • 数据集更新的模式,包括 patch 模式和 overwrite 模式。
      • 默认为 patch 模式。
    • tags: (List[str], optional)
      • 用户自定义的数据集标签。

    使用示例

    from starwhale import Dataset
    myds = Dataset.from_json(
    name="translation",
    json_text='[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]'
    )
    print(myds[0].features.en)
    from starwhale import Dataset
    myds = Dataset.from_json(
    name="translation",
    json_text='{"content":{"child_content":[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]}}',
    field_selector="content.child_content"
    )
    print(myds[0].features["zh-cn"])

    from_folder

    from_folder 是一个 classmethod 方法,能够读取指定目录中的 Image/Video/Audio 数据,并将其自动转化为 Starwhale 数据集。该函数支持如下特性:

    • 能够递归的搜索目标目录及子目录
    • 支持三种类型的文件提取:
      • image: 支持 png/jpg/jpeg/webp/svg/apng 图片类型。图片文件会被转化为 Starwhale.Image 类型。
      • video: 支持 mp4/webm/avi 视频类型。视频文件会被转化为 Starwhale.Video 类型。
      • audio: 支持 mp3/wav 音频类型。音频文件会被转化为 Starwhale.Audio 类型。
    • 每个文件对应数据集的一条记录,文件对应的数据集字段名称为 file
    • auto_label=True,则会使用父目录的名称作为该条数据的标签,对应 label 字段。根目录下的文件,则不会被打标签。
    • 若存在与 image/video/audio 同名的 txt 文件,则该文件内容会被作为 caption 字段内容存放到数据集中。
    • 若根目录存在 metadata.csvmetadata.jsonl 文件,则会自动读取文件的内容,并将其通过文件路径名作为关联,存入数据集中,可以用来指定 meta 信息。
      • metadata.csvmetadata.jsonl 文件是互斥的,当都存在的时候,程序会抛出异常。
      • metadata.csvmetadata.jsonl 每行记录中需要包含 file_name 字段,指向对应文件的路径。
      • metadata.csvmetadata.jsonl 对于数据集构建是可选的。
    @classmethod
    def from_folder(
    cls,
    folder: str | Path,
    kind: str | DatasetFolderSourceType,
    name: str | Resource = "",
    auto_label: bool = True,
    alignment_size: int | str = D_ALIGNMENT_SIZE,
    volume_size: int | str = D_FILE_VOLUME_SIZE,
    mode: DatasetChangeMode | str = DatasetChangeMode.PATCH,
    tags: t.List[str] | None = None,
    ) -> Dataset:

    参数

    • folder: (str|Path, required)
      • 文件夹路径
    • kind: (str|DatasetFolderSourceType, required)
      • 数据类型设置,目前支持 image, videoaudio 三种类型。
      • 会根据设置的 kind 值,在 folder 中递归寻找对应类型的文件。其他类型文件会被忽略掉。
    • name: (str|Resource, optional)
      • 数据集名称。
      • 若不指定,则使用目录名称作为数据集名称。
    • auto_label: (bool, optional)
      • 是否根据父目录的名字自动对每条记录打标签。
      • 默认为 True
    • alignment_size: (int|str, optional)
      • 数据集 blob 文件中数据对齐的尺寸。
      • 默认值为 128,即128个字节对齐。
    • volume_size: (int|str, optional)
      • 数据集 blob 文件的最大尺寸,超过后会产生一个新的 blob 文件。
      • 默认值为 64MB,即每个blob文件64MB大小。
    • mode: (str|DatasetChangeMode, optional)
      • 数据集更新的模式,包括 patch 模式和 overwrite 模式。
      • 默认为 patch 模式。
    • tags: (List[str], optional)
      • 用户自定义的数据集标签。

    使用示例 ${folder-example}

    • 函数调用示例

      from starwhale import Dataset

      # create a my-image-dataset dataset from /path/to/image folder.
      ds = Dataset.from_folder(
      folder="/path/to/image",
      kind="image",
      name="my-image-dataset"
      )
    • caption 示例

      folder/dog/1.png
      folder/dog/1.txt

      1.txt 中的内容,会填充到 1.png 所在行中 caption 字段中。

    • metadata.csvmetadata.jsonl 示例

      metadata.csv 内容:

      file_name, caption
      1.png, dog
      2.png, cat

      metadata.jsonl 内容:

      {"file_name": "1.png", "caption": "dog"}
      {"file_name": "2.png", "caption": "cat"}
    • 自动 label 示例

      folder/dog/1.png
      folder/cat/2.png
      folder/dog/3.png
      folder/cat/4.png

      生成的数据集中包含四条数据,分为 dogcat 两类。

    __iter__

    __iter__ 是一个 method 方法,能否对数据集进行迭代。

    from starwhale import dataset

    ds = dataset("mnist")

    for item in ds:
    print(item.index)
    print(item.features.label) # label 和 img 是 mnist数据集中的数据列
    print(item.features.img)

    batch_iter

    batch_iter 是一个 method 方法,能否批量的进行数据集迭代。

    def batch_iter(
    self, batch_size: int = 1, drop_not_full: bool = False
    ) -> t.Iterator[t.List[DataRow]]:

    参数

    • batch_size: (int, optional)
      • batch的大小,默认值为1。
    • drop_not_full: (bool, optional)
      • 最后一组batch数据数量小于 batch_size 时,该组数据是否会被抛弃掉。
      • 默认是不抛弃。

    使用示例

    from starwhale import dataset

    ds = dataset("mnist")
    for batch_rows in ds.batch_iter(batch_size=2):
    assert len(batch_rows) == 2
    print(batch_rows[0].features)

    __getitem__

    __getitem__ 是一个 method 方法,能提供数据集中某些行数据的获取,操作方式类似 Python 的 dict 和 list 类型。

    from starwhale import dataset

    ds = dataset("mock-int-index")

    # if the index type is string
    ds["str_key"] # get the DataRow by the "str_key" string key
    ds["start":"end"] # get a slice of the dataset by the range ("start", "end")

    ds = dataset("mock-str-index")
    # if the index type is int
    ds[1] # get the DataRow by the 1 int key
    ds[1:10:2] # get a slice of the dataset by the range (1, 10), step is 2

    __setitem__

    __setitem__ 是一个 method 方法,能提供数据集中行数据的更新,操作方式类似 Python 的 dict 类型。__setitem__ 支持多线程并行插入数据。

    def __setitem__(
    self, key: t.Union[str, int], value: t.Union[DataRow, t.Tuple, t.Dict]
    ) -> None:

    参数

    • key: (int|str, required)
      • key 即为数据集中每行的 index,类型为 int 或 str,一个数据集中只接受一种类型。
    • value: (DataRow|tuple|dict, required)
      • value 即为数据集中每行的 features,一般建议用 Python 的 dict 类型。

    使用示例

    • 插入数据

    test 数据中插入两条数据,index分别为 testtest2

    from starwhale import dataset

    with dataset("test") as ds:
    ds["test"] = {"txt": "abc", "int": 1}
    ds["test2"] = {"txt": "bcd", "int": 2}
    ds.commit()
    • 并行插入数据
    from starwhale import dataset, Binary
    from concurrent.futures import as_completed, ThreadPoolExecutor

    ds = dataset("test")

    def _do_append(_start: int) -> None:
    for i in range(_start, 100):
    ds.append((i, {"data": Binary(), "label": i}))

    pool = ThreadPoolExecutor(max_workers=10)
    tasks = [pool.submit(_do_append, i * 10) for i in range(0, 9)]

    ds.commit()
    ds.close()

    __delitem__

    __delitem__ 是一个 method 方法,用来删除数据集中的某些行数据。

    def __delitem__(self, key: _ItemType) -> None:
    from starwhale import dataset

    ds = dataset("existed-ds")
    del ds[6:9]
    del ds[0]
    ds.commit()
    ds.close()

    append

    append 是一个 method 方法,用来向数据集中添加数据,类似 Python list 的 append 函数。

    • 添加 features dict,每行数据自动 index 为 int 类型,从0开始自增。

      from starwhale import dataset, Image

      with dataset("new-ds") as ds:
      for i in range(0, 100):
      ds.append({"label": i, "image": Image(f"folder/{i}.png")})
      ds.commit()
    • 添加 index + features dict,数据集中每行数据的 index 不会被自动处理。

      from dataset import dataset, Image

      with dataset("new-ds") as ds:
      for i in range(0, 100):
      ds.append((f"index-{i}", {"label": i, "image": Image(f"folder/{i}.png")}))

      ds.commit()

    extend

    extend 是一个 method 方法,用来向数据集中批量添加数据,类似 Python list 的 extend 函数。

    from starwhale import dataset, Text

    ds = dataset("new-ds")
    ds.extend([
    (f"label-{i}", {"text": Text(), "label": i}) for i in range(0, 10)
    ])
    ds.commit()
    ds.close()

    commit

    commit 是一个 method 方法,调用 commit 时会将当前缓存中数据 flush 到存储中,并产生一个数据集版本,后续可以用这个版本信息加载相应的数据集内容。

    对于一个数据集,如果添加一些数据后,并没有调用 commit 方法,而是直接调用 close 或退出进程,那么这些数据依旧会写入到数据集中,只是没有一个生成一个新的版本。

    @_check_readonly
    def commit(
    self,
    tags: t.Optional[t.List[str]] = None,
    message: str = "",
    force_add_tags: bool = False,
    ignore_add_tags_errors: bool = False,
    ) -> str:

    参数

    • tags: (List(str), optional)
      • 指定 tags,可以指定多个tag。
    • message: (str, optional)
      • 提交信息,默认为空。
    • force_add_tags: (bool, optional)
      • 当给改版本添加标签时,对于 server/cloud 实例,若标签已经被应用到其他数据集版本时,可以使用 force_add_tags=True 参数强制将标签添加到此版本上,否则会抛出异常。
      • 默认为 False
    • ignore_add_tags_errors: (bool, optional)
      • 忽略添加标签是否抛出的异常。
      • 默认为 False

    使用示例

    from starwhale import dataset
    with dataset("mnist") as ds:
    ds.append({"label": 1})
    ds.commit(message="init commit")

    readonly

    readonly 是一个 property 属性,表示数据集是否只读,返回值为 bool 类型。

    from starwhale import dataset
    ds = dataset("mnist", readonly=True)
    assert ds.readonly

    loading_version

    loading_version 是一个 property 属性,字符串类型。

    • 当加载一个已经存在的数据集时,返回的是数据集加载的对应版本。
    • 对加载一个不存在的数据集时,返回的是 pending_commit_version

    pending_commit_version

    pending_commit_version 是一个 property 属性,字符串类型,表示待提交的版本信息。当调用 commit 方法后,pending_commit_version 会变成 committed_version

    committed_version

    committed_version 是一个 property 属性,字符串类型,表示已经调用 commit 方法后生成的版本信息。当没有调用 commit 方法时,访问该属性时程序会抛出异常。

    remove

    remove 是一个 method 方法,等价于 swcli dataset remove 命令,能够删除数据集。

    def remove(self, force: bool = False) -> None:

    recover

    recover 是一个 method 方法,等价于 swcli dataset recover 命令,能够对软删除且未GC的数据集进行恢复。

    def recover(self, force: bool = False) -> None:

    summary

    summary 是一个 method 方法,等价于 swcli dataset summary 命令,返回数据集摘要信息。

    def summary(self) -> t.Optional[DatasetSummary]:

    history

    history 是一个 method 方法,等价于 swcli dataset history 命令,返回数据集的历史记录。

    def history(self) -> t.List[t.Dict]:

    flush

    flush 是一个 method 方法,能够将内存中暂存的数据刷到持久化存储中。commitclose 方法会自动调用 flush

    close

    close 是一个 method 方法,关闭已经打开的数据集相关链接。Dataset 也实现了 contextmanager,使用 with 语法后可以自动关闭数据集,不需要主动调用 close 方法。

    from starwhale import dataset

    ds = dataset("mnist")
    ds.close()

    with dataset("mnist") as ds:
    print(ds[0])

    head 是一个 method 方法,能够显示数据集前n行数据,等价于 swcli dataset head 命令。

    def head(self, n: int = 5, skip_fetch_data: bool = False) -> t.List[DataRow]:

    fetch_one

    fetch_one 是一个 method 方法,获得数据集的第一条记录,相当于 head(n=1)[0]

    list

    list 是一个 classmethod 方法,能够列出项目 URI 下的 Starwhale 数据集,等价于 swcli dataset list 命令。

    @classmethod
    def list(
    cls,
    project_uri: t.Union[str, Project] = "",
    fullname: bool = False,
    show_removed: bool = False,
    page_index: int = DEFAULT_PAGE_IDX,
    page_size: int = DEFAULT_PAGE_SIZE,
    ) -> Tuple[DatasetListType, Dict[str, Any]]:

    copy

    copy 是一个 method 方法,能够复制数据到其他实例上,等价于 swcli dataset copy 命令。

    def copy(
    self,
    dest_uri: str,
    dest_local_project_uri: str = "",
    force: bool = False,
    mode: str = DatasetChangeMode.PATCH.value,
    ignore_tags: t.List[str] | None = None,
    ) -> None:

    参数

    • dest_uri: (str, required)
      • Dataset URI
    • dest_local_project_uri: (str, optional)
      • 从远端复制到本地 Standalone 实例时,可以指定对应的项目 URI。
    • force: (bool, optional)
      • 当目标实例上已经有相同版本的数据集时,是否强制覆盖。
      • 默认不覆盖。
      • 当复制标签到远端 Server/Cloud 实例时,若标签已经被其他版本使用,使用 force=True 参数可以强制变更标签到本版本上。
    • mode: (str, optional)
      • 数据集复制模式,分为 patch 模式 和 overwrite 模式,默认为 patch
      • patch: 使用补丁方式更新数据集,只更新计划变更的行和列,在新生成的版本中仍能读取到未受影响的行和列。
      • overwrite: 使用覆盖方式更新数据集,会将原来的所有行都删除,然后再进行更新,在新生成的版本中读取不到老数据。但请放心,删除的数据依旧可以通过旧版本进行访问。
    • ignore_tags (List[str], optional)
      • 复制数据集时,可以忽略的自定义标签。
      • 默认会复制所有用户自定义标签到其他实例中。
      • 复制标签会忽略 latest^v\d+$ 内建标签。

    使用示例

    from starwhale import dataset
    ds = dataset("mnist")
    ds.copy("cloud://remote-instance/project/starwhale")

    to_pytorch

    to_pytorch 是一个 method 方法,能够将 Starwhale 数据集转化为 Pytorch 的 torch.utils.data.Dataset 类型,可以进一步传给 torch.utils.data.DataLoader 进行使用。

    需要注意的是,to_pytorch 函数返回的是 Pytorch 的 IterableDataset

    def to_pytorch(
    self,
    transform: t.Optional[t.Callable] = None,
    drop_index: bool = True,
    skip_default_transform: bool = False,
    ) -> torch.utils.data.Dataset:

    参数

    • transform: (callable, optional)
      • 支持用户自定义变换函数,能够按需转化数据类型。
    • drop_index: (bool, optional)
      • 是否抛弃掉 index 字段。
    • skip_default_transform: (bool, optional)
      • 如果没有设置 transform, 默认状态下会使用 Starwhale 内建的 transform 函数,对数据进行转化,可以通过 skip_default_transform 参数禁用内建数据转化。

    使用示例

    import torch.utils.data as tdata
    from starwhale import dataset

    ds = dataset("mnist")

    torch_ds = ds.to_pytorch()
    torch_loader = tdata.DataLoader(torch_ds, batch_size=2)
    import torch.utils.data as tdata
    from starwhale import dataset

    with dataset("mnist") as ds:
    for i in range(0, 10):
    ds.append({"txt": Text(f"data-{i}"), "label": i})

    ds.commit()

    def _custom_transform(data: t.Any) -> t.Any:
    data = data.copy()
    txt = data["txt"].to_str()
    data["txt"] = f"custom-{txt}"
    return data

    torch_loader = tdata.DataLoader(
    dataset(ds.uri).to_pytorch(transform=_custom_transform), batch_size=1
    )
    item = next(iter(torch_loader))
    assert isinstance(item["label"], torch.Tensor)
    assert item["txt"][0] in ("custom-data-0", "custom-data-1")

    to_tensorflow

    to_tensorflow 是一个 method 方法,能够将 Starwhale 数据集转化为 Tensorflow 的 tensorflow.data.Dataset 类型。

    def to_tensorflow(self, drop_index: bool = True) -> tensorflow.data.Dataset:

    参数

    • drop_index: (bool, optional)
      • 是否抛弃掉 index 字段。

    使用示例

    from starwhale import dataset
    import tensorflow as tf

    ds = dataset("mnist")
    tf_ds = ds.to_tensorflow(drop_index=True)
    assert isinstance(tf_ds, tf.data.Dataset)

    with_builder_blob_config

    with_builder_blob_config 是一个 method 方法,用来设置 Starwhale 数据集中 blob 的相关属性信息。需要在变更数据之前调用。

    def with_builder_blob_config(
    self,
    volume_size: int | str | None = D_FILE_VOLUME_SIZE,
    alignment_size: int | str | None = D_ALIGNMENT_SIZE,
    ) -> Dataset:

    参数

    • volume_size: (int|str, optional)
      • 单个数据集 blob 文件的大小。
      • 默认值为 64MB。
      • 当类型为 int 时,单位为 Bytes。
      • 当类型为 str 是,格式类似 1GB, 64MB
    • alignment_size: (int|str, optional)
      • 数据集 blob 文件中数据对齐的大小
      • 默认值为 128个字节。
      • volume_size 一样的类型解析。

    使用示例

    from starwhale import dataset, Binary

    ds = dataset("mnist").with_builder_blob_config(volume_size="32M", alignment_size=128)
    ds.append({"data": Binary(b"123")})
    ds.commit()
    ds.close()

    with_loader_config

    with_loader_config 是一个 method 方法,用来设置 Starwhale 数据集 loader 的过程参数。

    def with_loader_config(
    self,
    num_workers: t.Optional[int] = None,
    cache_size: t.Optional[int] = None,
    field_transformer: t.Optional[t.Dict] = None,
    ) -> Dataset:

    参数

    • num_workers: (int, optional)
      • 加载数据集的 worker 数目,默认为2。
    • cache_size: (int, optional)
      • 预加载的数据的数量,默认为20条。
    • field_transformer: (dict, optional)
      • features 字段名称的变换。

    使用示例

    from starwhale import Dataset, dataset
    Dataset.from_json(
    "translation",
    '[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]'
    )
    myds = dataset("translation").with_loader_config(field_transformer={"en": "en-us"})
    assert myds[0].features["en-us"] == myds[0].features["en"]
    from starwhale import Dataset, dataset
    Dataset.from_json(
    "translation2",
    '[{"content":{"child_content":[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]}}]'
    )
    myds = dataset("translation2").with_loader_config(field_transformer={"content.child_content[0].en": "en-us"})
    assert myds[0].features["en-us"] == myds[0].features["content"]["child_content"][0]["en"]
    - + \ No newline at end of file diff --git a/zh/next/reference/sdk/evaluation/index.html b/zh/next/reference/sdk/evaluation/index.html index 1d84d712f..3fef1d782 100644 --- a/zh/next/reference/sdk/evaluation/index.html +++ b/zh/next/reference/sdk/evaluation/index.html @@ -10,7 +10,7 @@ - + @@ -18,7 +18,7 @@
    版本:WIP

    Starwhale 模型评测 SDK

    @evaluation.predict

    @evaluation.predict 是一个修饰器,定义模型评测中的推理过程,类似 MapReduce 中的 map 阶段,能够完成如下核心功能:

    • 在 Server 实例上,申请运行所需要的资源。
    • 自动读取本地或远端的数据集,将数据集中的数据以单条或批量的方式,传递给 evaluation.predict 修饰的函数。
    • 通过多副本的设置,实现分布式数据集消费的功能,能以水平扩展的方式缩短模型评测任务的用时。
    • 自动将函数返回值和数据集的输入 features 存储到 results 表中,方便Web UI展示和进一步的 evaluate 阶段使用。
    • 每单条或每批量组数据会调用一次被修饰的函数,完成推理过程。

    控制参数

    • resources: (dict, optional)
      • 定义 predict 每个任务在 Server 实例上运行时所需要的资源,包括 memorycpunvidia.com/gpu 三种类型。
        • memory: 单位为 Bytes,支持 int 和 float 类型。
          • 支持以字典的方式设置 requestlimit,比如 resources={"memory": {"request": 100 * 1024, "limit": 200: 1024}}
          • 若仅设置单个数字,则 SDK 会自动将 requestlimit 设置为相同的数值,比如 resources={"memory": 100 * 1024} 等价于 resources={"memory": {"request": 100 * 1024, "limit": 100 * 1024}}
        • cpu: 单位为 CPU 核心数,支持 int 和 float 类型。
          • 支持以字典的方式设置 requestlimit,比如 resources={"cpu": {"request": 1, "limit": 2}}
          • 若仅设置单个数字,则 SDK 会自动将 requestlimit 设置为相同的数值,比如 resources={"cpu": 1.5} 等价于 resources={"cpu": {"request": 1.5, "limit": 1.5}}
        • nvidia.com/gpu: 单位为 GPU显卡数,支持 int 类型。
          • nvidia.com/gpu 不支持设置 requestlimit,仅支持单个数字。
      • 需要注意: resource 参数目前仅在 Server 实例中生效。Cloud 实例,通过在提交评测任务时,选择对应的资源池达到相同的作用。Standalone 实例完全不支持该特性。
    • replicas: (int, optional)
      • predict 运行的副本数。
      • predict 相当于定义了一个 Step, 在该 Step 中有若干等价的 Task,每个 Task 在 Cloud/Server 实例上运行实体是 Pod,在 Standalone 实例上运行实体是 Thread。
      • 当指定多个副本时,这些副本是等价的,它们会共同消费选定的数据集,实现分布式数据集消费的目的,可以理解为某个数据集中的某行数据,只会被一个 predict 副本读取。
      • 默认值为1。
    • batch_size: (int, optional)
      • 批量将数据集中的数据传递进函数中。
      • 默认值为1。
    • fail_on_error: (bool, optional)
      • 当被修饰的函数抛出异常时,是否中断所有模型评测。如果预期某些“异常”数据会导致评测失败,但不想中断整体评测,可以设置 fail_on_error=False
      • 默认为 True
    • auto_log: (bool, optional)
      • 是否自动记录函数返回值和数据集输入 features 到 results 表中。
      • 默认为 True
    • log_mode: (str, optional)
      • auto_log=True 时,可以通过设置 log_mode 参数,定义以 plainpickle 方式记录函数返回值。
      • 默认为 pickle 方式。
    • log_dataset_features: (List[str], optional)
      • auto_log=True 时,可以通过该参数,选择性的记录数据集中的某些 features 。
      • 默认会记录所有的 features 。
    • needs: (List[Callable], optional)
      • 定义该任务运行的前置条件,可以用 needs 语法实现 DAG。
      • needs 接受被 @evaluation.predict, @evaluation.evaluate@handler 修饰的函数。
      • 默认为空,不依赖任何其他任务。

    传入参数

    被修饰的函数,需要定义一些输入参数,用来接受数据集内容等,包含如下模式:

    • 单个 data 参数:

      • data 为 一个类 dict 类型,能够读取到数据集的 features 内容。
      • batch_size=1 或不设置 batch_size 时,可以通过 data['label']data.label 方式读取 label feature。
      • 当设置 batch_size > 1 时,data 为一个 list。
      from starwhale import evaluation

      @evaluation.predict
      def predict(data):
      print(data['label'])
      print(data.label)
    • data + external 参数方式:

      • data 为数据集的features。
      • external 为一个 dict 类型,包含 index, index_with_dataset, dataset_info, contextdataset_uri 这些内建属性,可以用来做更细粒度的处理。
        • index: 数据集对应行的 index 信息。
        • index_with_dataset: 适用于多个数据集输入的时候做 index 区分。
        • dataset_info: starwhale.core.dataset.tabular.TabularDatasetInfo 对象。
        • context: starwhale.Context 对象。
        • dataset_uri: starwhale.nase.uri.resource.Resource 对象。
      from starwhale import evaluation

      @evaluation.predict
      def predict(data, external):
      print(data['label'])
      print(data.label)
      print(external["context"])
      print(external["dataset_uri"])
    • data + **kw 方式:

      • data 为数据集的features。
      • kw 可以读取到 external
      from starwhale import evaluation

      @evaluation.predict
      def predict(data, **kw):
      print(kw["external"]["context"])
      print(kw["external"]["dataset_uri"])
    • *args + **kwargs 方式:

      • args的第一个元素为 data
      from starwhale import evaluation

      @evaluation.predict
      def predict(*args, **kw):
      print(args[0].label)
      print(args[0]["label"])
      print(kw["external"]["context"])
    • **kwargs 方式:

      from starwhale import evaluation

      @evaluation.predict
      def predict(**kw):
      print(kw["data"].label)
      print(kw["data"]["label"])
      print(kw["external"]["context"])
    • *args 方式:

      • 此方式无法读取到 external 信息。
      from starwhale import evaluation

      @evaluation.predict
      def predict(*args):
      print(args[0].label)
      print(args[0]["label"])

    使用示例

    from starwhale import evaluation

    @evaluation.predict
    def predict_image(data):
    ...

    @evaluation.predict(
    dataset="mnist/version/latest",
    batch_size=32,
    replicas=4,
    needs=[predict_image],
    )
    def predict_batch_images(batch_data)
    ...

    @evaluation.predict(
    resources={"nvidia.com/gpu": 1,
    "cpu": {"request": 1, "limit": 2},
    "memory": 200 * 1024}, # 200MB
    log_mode="plain",
    )
    def predict_with_resources(data):
    ...

    @evaluation.predict(
    replicas=1,
    log_mode="plain",
    log_dataset_features=["txt", "img", "label"],
    )
    def predict_with_selected_features(data):
    ...

    @evaluation.evaluate

    @evaluation.evalute 是一个修饰器,定义模型评测中的评测过程,类似 MapReduce 中的 reduce 阶段,能够完成如下核心功能:

    • 在 Server 实例上,申请运行所需要的资源。
    • 自动读取 predict 阶段记录到 results 表的数据,并以迭代器的方式传入函数中。
    • evaluate 阶段只会运行一个副本,无法像 predict 阶段一样定义 replicas 参数。

    参数

    • resources: (dict, optional)
      • @evaluation.predict 中的 resources 参数定义保持一致。
    • needs: (List[Callable], optional)
      • @evaluation.predict 中的 needs 参数定义保持一致。
      • 绝大多数场景中,会依赖一个 @evaluation.predict 修饰的函数。
    • use_predict_auto_log: (bool, optional)
      • 默认为 True,传入一个能够能够遍历 predict 结果的迭代器到函数中。

    输入参数

    • use_predict_auto_log=True(默认)时,传入一个能够能够遍历 predict 结果的迭代器到函数中。
      • 迭代出来的对象为一个字典,包含 outputinput 两个key。
        • outputpredict 阶段函数返回的元素。
        • input 为推理时对应使用的数据集的 features ,为一个字典类型。
    • use_predict_auto_log=False 时,不传入任何参数到函数中。

    使用示例

    from starwhale import evaluation

    @evaluation.evaluate(needs=[predict_image])
    def evaluate_results(predict_result_iter):
    ...

    @evaluation.evaluate(
    use_predict_auto_log=False,
    needs=[predict_image],
    )
    def evaluate_results():
    ...

    class Evaluation

    starwhale.Evaluation 实现 Starwhale Model Evaluation 的抽象,能对Standalone/Server/Cloud实例上的Model Evaluation进行log和scan等操作,用来记录和检索指标。

    __init__

    __init__ 函数用来初始化一个 Evaluation 对象。

    class Evaluation
    def __init__(self, id: str, project: Project | str) -> None:

    参数

    • id: (str, required)
      • Evaluation 的 UUID,此ID由 Starwhale 系统自动生成。
    • project: (Project|str, required)
      • Project 对象或 Project URI 字符串。

    使用示例

    from starwhale import Evaluation

    standalone_e = Evaluation("fcd1206bf1694fce8053724861c7874c", project="self")
    server_e = Evaluation("fcd1206bf1694fce8053724861c7874c", project="cloud://server/project/starwhale:starwhale")
    cloud_e = Evaluation("2ddab20df9e9430dbd73853d773a9ff6", project="https://cloud.starwhale.cn/project/starwhale:llm-leaderboard")

    from_context

    from_context 是一个 classmethod 方法,获得当前 Context 下的 Evaluation 对象。from_context 在任务运行环境下才能生效,非任务运行环境调用该方法,会抛出 RuntimeError 异常,提示 Starwhale Context 没有被合理设置。

    @classmethod
    def from_context(cls) -> Evaluation:

    使用示例

    from starwhale import Evaluation

    with Evaluation.from_context() as e:
    e.log("label/1", 1, {"loss": 0.99, "accuracy": 0.98})

    log

    log 是一个 method 方法,记录某些评测指标到特定表中,之后可以通过 Server/Cloud 实例的 Web 页面或 scan 方法中查看相关的表。

    def log(
    self, category: str, id: t.Union[str, int], metrics: t.Dict[str, t.Any]
    ) -> None:

    参数

    • category: (str, required)
      • 记录的类别,该值会被作为 Starwhale Datastore 的表名的后缀。
      • 一个 category 会对应一张 Starwhale Datastore 的表,这些表会以评测任务ID作为隔离区分,相互不影响。
    • id: (str|int, required)
      • 记录的ID,表内唯一。
      • 同一张表,只能采用 str 或 int 的一种类型作为 ID 类型。
    • metrics: (dict, required)
      • 字典类型,key-value 方式记录指标。
      • key 为 str 类型。
      • value 既支持 int, float, str, bytes, bool 等常量类型,也支持 tuple, list, dict 等复合类型。同时也支持Artifacts类型 Starwhale.Image, Starwhale.Video, Starwhale.Audio, Starwhale.Text, Starwhale.Binary 的记录。
      • 当 value 中包含 dict 类型时,Starwhale SDK会自动展平字典,便于更好的进行可视化展示和指标对比。
        • 比如 metrics 为 {"test": {"loss": 0.99, "prob": [0.98,0.99]}, "image": [Image, Image]} , 存入后会变成 {"test/loss": 0.99, "test/prob": [0.98, 0.99], "image/0": Image, "image/1": Image} 结构。

    使用示例

    from starwhale import Evaluation

    evaluation_store = Evaluation.from_context()

    evaluation_store.log("label/1", 1, {"loss": 0.99, "accuracy": 0.98})
    evaluation_store.log("ppl", "1", {"a": "test", "b": 1})

    scan

    scan 是一个 method 方法,返回一个迭代器,用来读取某些模型评测表中的数据。

    def scan(
    self,
    category: str,
    start: t.Any = None,
    end: t.Any = None,
    keep_none: bool = False,
    end_inclusive: bool = False,
    ) -> t.Iterator:

    参数

    • category: (str, required)
      • log 函数中的 category 参数含义一致。
    • start: (Any, optional)
      • 起始 Key,若不指定,则从表的第一条数据开始。
    • end: (Any, optional)
      • 结束 Key,若不指定,则一直遍历到表的结尾。
    • keep_none: (bool, optional)
      • 若某列的值为 None,是否返回该列,默认不返回。
    • end_inclusive: (bool, optional)
      • 是否包含 end 对应的行,默认不包含。

    使用示例

    from starwhale import Evaluation

    evaluation_store = Evaluation(id="2ddab20df9e9430dbd73853d773a9ff6", project="https://cloud.starwhale.cn/projects/349")
    results = [data for data in evaluation_store.scan("label/0")]

    flush

    flush 是一个 method 方法,能够将 log 方法记录的指标立即刷新到 datastore 和 oss 存储中。若不调用 flush 方法,Evaluation 最后关闭的时候,也会自动刷新数据到存储中。

    def flush(self, category: str, artifacts_flush: bool = True) -> None

    参数

    • category: (str, required)
      • log 函数中的 category 参数含义一致。
    • artifacts_flush: (bool, optional)
      • 是否转储制品数据到blob文件,并上传到相关存储中。默认为 True

    log_result

    log_result 是一个 method 方法,记录评测指标到 results 表中,等价于 log 方法指定 category 参数为 resultsresults 表一般用来存储推理结果,@starwhale.predict 默认情况下会将修饰函数的返回值存储在 results 表中,也可以用 log_results 手动存储。

    def log_result(self, id: t.Union[str, int], metrics: t.Dict[str, t.Any]) -> None:

    参数

    • id: (str|int, required)
      • 记录的ID,results 表内唯一。
      • 同一张表,只能采用 str 或 int 的一种类型作为 ID 类型。
    • metrics: (dict, required)
      • log 函数中 metrics 参数定义一致。

    使用示例

    from starwhale import Evaluation

    evaluation_store = Evaluation(id="2ddab20df9e9430dbd73853d773a9ff6", project="self")
    evaluation_store.log_result(1, {"loss": 0.99, "accuracy": 0.98})
    evaluation_store.log_result(2, {"loss": 0.98, "accuracy": 0.99})

    scan_results

    scan_results 是一个 method 方法,返回一个迭代器,用来读取 results 表中的数据。

    def scan_results(
    self,
    start: t.Any = None,
    end: t.Any = None,
    keep_none: bool = False,
    end_inclusive: bool = False,
    ) -> t.Iterator:

    参数

    • start: (Any, optional)
      • 起始 Key,若不指定,则从表的第一条数据开始。
      • scan 函数中 start 参数定义一致。
    • end: (Any, optional)
      • 结束 Key,若不指定,则一直遍历到表的结尾。
      • scan 函数中 end 参数定义一致。
    • keep_none: (bool, optional)
      • 若某列的值为 None,是否返回该列,默认不返回。
      • scan 函数中 keep_none 参数定义一致。
    • end_inclusive: (bool, optional)
      • 是否包含 end 对应的行,默认不包含。
      • scan 函数中 end_inclusive 参数定义一致。

    使用示例

    from starwhale import Evaluation

    evaluation_store = Evaluation(id="2ddab20df9e9430dbd73853d773a9ff6", project="self")

    evaluation_store.log_result(1, {"loss": 0.99, "accuracy": 0.98})
    evaluation_store.log_result(2, {"loss": 0.98, "accuracy": 0.99})
    results = [data for data in evaluation_store.scan_results()]

    flush_results

    flush_results 是一个 method 方法,能够将 log_results 方法记录的指标立即刷新到 datastore 和 oss 存储中。若不调用 flush_results 方法,Evaluation 最后关闭的时候,也会自动刷新数据到存储中。

    def flush_results(self, artifacts_flush: bool = True) -> None:

    参数

    • artifacts_flush: (bool, optional)
      • 是否转储制品数据到blob文件,并上传到相关存储中。默认为 True
      • flush 方法中 artifacts_flush 参数定义一致。

    log_summary

    log_summary 是一个 method 方法,记录某些指标到 summary 表中,Server/Cloud 实例评测页面显示的就是 summary 表的数据。 每次调用,Starwhale 都会自动以此次评测的唯一ID作为表的行ID进行更新,可以再一次评测过程中多次调用该函数,用来更新不同的列。

    每个项目中有一张 summary 表,所有该项目下的评测任务都会将 summary 信息写入该表中,便于进行不同模型评测的结果对比。

    def log_summary(self, *args: t.Any, **kw: t.Any) -> None:

    log 函数一致,也会对字典类型自动展平。

    使用示例

    from starwhale import Evaluation

    evaluation_store = Evaluation(id="2ddab20df9e9430dbd73853d773a9ff6", project="https://cloud.starwhale.cn/projects/349")

    evaluation_store.log_summary(loss=0.99)
    evaluation_store.log_summary(loss=0.99, accuracy=0.99)
    evaluation_store.log_summary({"loss": 0.99, "accuracy": 0.99})

    get_summary

    get_summary 是一个 method 方法,用来返回 log_summary 记录的信息。

    def get_summary(self) -> t.Dict:

    flush_summary

    flush_summary 是一个 method 方法,能够将 log_summary 方法记录的指标立即刷新到 datastore 和 oss 存储中。若不调用 flush_results 方法,Evaluation 最后关闭的时候,也会自动刷新数据到存储中。

    def flush_summary(self, artifacts_flush: bool = True) -> None:

    参数

    • artifacts_flush: (bool, optional)
      • 是否转储制品数据到blob文件,并上传到相关存储中。默认为 True
      • flush 方法中 artifacts_flush 参数定义一致。

    flush_all

    flush_all 是一个 method 方法,能够将 log, log_results, log_summary 方法记录的指标立即刷新到 datastore 和 oss 存储中。若不调用 flush_all 方法,Evaluation 最后关闭的时候,也会自动刷新数据到存储中。

    def flush_all(self, artifacts_flush: bool = True) -> None:

    参数

    • artifacts_flush: (bool, optional)
      • 是否转储制品数据到blob文件,并上传到相关存储中。默认为 True
      • flush 方法中 artifacts_flush 参数定义一致。

    get_tables

    get_tables 是一个 method 方法,返回模型评测中产生的所有表的名称,需要注意的是,该函数并不返回 summary 表名称。

    def get_tables(self) -> t.List[str]:

    close

    close 是一个 method 方法,用来关闭 Evaluation 对象。close 调用时会将,会自动刷新数据到存储中。同时 Evaluation 也实现了 __enter____exit__ 方法,可以用 with 语法简化 close 的手工调用。

    def close(self) -> None:

    使用示例

    from starwhale import Evaluation

    evaluation_store = Evaluation(id="2ddab20df9e9430dbd73853d773a9ff6", project="https://cloud.starwhale.cn/projects/349")
    evaluation_store.log_summary(loss=0.99)
    evaluation_store.close()

    # auto close when the with-context exits.
    with Evaluation.from_context() as e:
    e.log_summary(loss=0.99)

    @handler

    @handler 是一个修饰器,具备如下功能:

    • 在 Server 实例上,申请运行所需要的资源。
    • 可以控制副本数。
    • 多个 Handlers 可以通过依赖关系,生成DAG,便于控制执行流程。
    • 可以对外暴露端口,以类似 Web Handler 方式运行。

    @fine_tune, @evaluation.predict@evaluation.evalute 可以认为是 @handler 在某些特定领域的应用,@handler 是这些修饰器的底层实现。@handler 更为基础和灵活。

    @classmethod
    def handler(
    cls,
    resources: t.Optional[t.Dict[str, t.Any]] = None,
    replicas: int = 1,
    needs: t.Optional[t.List[t.Callable]] = None,
    name: str = "",
    expose: int = 0,
    require_dataset: bool = False,
    ) -> t.Callable:

    参数

    • resources: (dict, optional)
      • @evaluation.predict 中的 resources 参数定义保持一致。
    • needs: (List[Callable], optional)
      • @evaluation.predict 中的 needs 参数定义保持一致。
    • replicas: (int, optional)
      • @evaluation.predict 中的 replicas 参数定义保持一致。
    • name: (str, optional)
      • 显示 handler 时候用的名字。
      • 若不指定,则用修饰函数的名字。
    • expose: (int, optional)
      • 对外暴露的端口,当运行一个 Web Handler的时候,需要声明暴露的端口。
      • 默认为0,表示不暴露任何端口。
      • 目前只能暴露一个端口。
    • require_dataset: (bool, optional)
      • 定义此 Handler 运行时,是否需要数据集。
      • 如果 required_dataset=True,在 Server/Cloud 实例的 Web 界面创建评测任务的时候,需要让用户强制输入数据集;如果 required_dataset=False,则 Web 界面中不需要用户指定数据集。
      • 默认为 False

    使用示例

    from starwhale import handler
    import gradio

    @handler(resources={"cpu": 1, "nvidia.com/gpu": 1}, replicas=3)
    def my_handler():
    ...

    @handler(needs=[my_handler])
    def my_another_handler():
    ...

    @handler(expose=7860)
    def chatbot():
    with gradio.Blocks() as server:
    ...
    server.launch(server_name="0.0.0.0", server_port=7860)

    @fine_tune

    fine_tune 是一个修饰器,定义模型训练的微调(fine-tune)过程。

    一些限制和使用建议:

    • fine_tune 只有一个副本。
    • fine_tune 需要有数据集输入。
    • 一般在 fine_tune 开始时,通过 Context.get_runtime_context() 获取数据集。
    • 一般在 fine_tune 结束是,通过 starwhale.model.build 生成微调后的Starwhale 模型包,该模型包会被自动复制到评测对应的项目中。

    参数

    • resources: (dict, optional)
      • @evaluation.predict 中的 resources 参数定义保持一致。
    • needs: (List[Callable], optional)
      • @evaluation.predict 中的 needs 参数定义保持一致。

    使用示例

    from starwhale import model as starwhale_model
    from starwhale import fine_tune, Context

    @fine_tune(resources={"nvidia.com/gpu": 1})
    def llama_fine_tuning():
    ctx = Context.get_runtime_context()

    if len(ctx.dataset_uris) == 2:
    # TODO: use more graceful way to get train and eval dataset
    train_dataset = dataset(ctx.dataset_uris[0], readonly=True, create="forbid")
    eval_dataset = dataset(ctx.dataset_uris[1], readonly=True, create="forbid")
    elif len(ctx.dataset_uris) == 1:
    train_dataset = dataset(ctx.dataset_uris[0], readonly=True, create="forbid")
    eval_dataset = None
    else:
    raise ValueError("Only support 1 or 2 datasets(train and eval dataset) for now")

    #user training code
    train_llama(
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    )

    model_name = get_model_name()
    starwhale_model.build(name=f"llama-{model_name}-qlora-ft")

    @multi_classification

    @multi_classification 修饰器使用sklearn lib对多分类问题进行结果分析,输出confusion matrix, roc, auc等值,并且会写入到 starwhale DataStore 相关表中。 使用的时候需要对所修饰的函数返回值有一定要求,返回(label, result, probability_matrix)(label, result)

    def multi_classification(
    confusion_matrix_normalize: str = "all",
    show_hamming_loss: bool = True,
    show_cohen_kappa_score: bool = True,
    show_roc_auc: bool = True,
    all_labels: t.Optional[t.List[t.Any]] = None,
    ) -> t.Any:

    参数

    • confusion_matrix_normalize: (str, optional)
      • 接收三种参数:
        • true: rows
        • pred: columns
        • all: rows+columns
    • show_hamming_loss: (bool, optional)
      • 是否计算hamming loss。
      • 默认为 True
    • show_cohen_kappa_score: (bool, optional)
      • 是否计算 cohen kappa score。
      • 默认为 True
    • show_roc_auc: (bool, optional)
      • 是否计算roc/auc, 计算的时候,需要函数返回(label,result, probability_matrix) 三元组,否则只需返回(label, result) 两元组即可。
      • 默认为 True
    • all_labels: (List, optional)
      • 定义所有的Labels。

    使用示例


    @multi_classification(
    confusion_matrix_normalize="all",
    show_hamming_loss=True,
    show_cohen_kappa_score=True,
    show_roc_auc=True,
    all_labels=[i for i in range(0, 10)],
    )
    def evaluate(ppl_result) -> t.Tuple[t.List[int], t.List[int], t.List[t.List[float]]]:
    label, result, probability_matrix = [], [], []
    return label, result, probability_matrix

    @multi_classification(
    confusion_matrix_normalize="all",
    show_hamming_loss=True,
    show_cohen_kappa_score=True,
    show_roc_auc=False,
    all_labels=[i for i in range(0, 10)],
    )
    def evaluate(ppl_result) -> t.Tuple[t.List[int], t.List[int], t.List[t.List[float]]]:
    label, result = [], [], []
    return label, result

    PipelineHandler

    PipelineHandler 是一个类,提供默认的模型评测过程定义,需要用户实现 predictevaluate 函数。

    PipelineHandler 等价于 @evaluation.predict + @evaluation.evaluate,展示使用方式不一样,背后的模型评测过程一致。

    用户需要实现如下函数:

    • predict: 定义推理过程,等价于 @evaluation.predict 修饰的函数。
    • evaluate: 定义评测过程,等价于 @evaluation.evaluate 修饰的函数。
    from typing import Any, Iterator
    from abc import ABCMeta, abstractmethod

    class PipelineHandler(metaclass=ABCMeta):
    def __init__(
    self,
    predict_batch_size: int = 1,
    ignore_error: bool = False,
    predict_auto_log: bool = True,
    predict_log_mode: str = PredictLogMode.PICKLE.value,
    predict_log_dataset_features: t.Optional[t.List[str]] = None,
    **kwargs: t.Any,
    ) -> None:
    self.context = Context.get_runtime_context()
    ...

    def predict(self, data: Any, **kw: Any) -> Any:
    raise NotImplementedError

    def evaluate(self, ppl_result: Iterator) -> Any
    raise NotImplementedError

    参数

    • predict_batch_size: (int, optional)
      • 等价于 @evaluation.predict 中的 batch_size 参数。
      • 默认值为1。
    • ignore_error: (bool, optional)
      • 等价于 @evaluation.predict 中的 fail_on_error 参数。
      • 默认值为 False
    • predict_auto_log: (bool, optional)
      • 等价于 @evaluation.predict 中的 auto_log 参数。
      • 默认值为 True
    • predict_log_mode: (bool, optional)
      • 等价于 @evaluation.predict 中的 log_mode 参数。
      • 默认值为 pickle
    • predict_log_dataset_features: (bool, optional)
      • 等价于 @evaluation.predict 中的 log_dataset_features 参数。
      • 默认值为空,对记录所有 features。

    PipelineHandler.run 修饰符

    PipelineHandler.run 修饰符可以对 predictevaluate 方法进行资源描述,支持 replicasresources 的定义:

    • PipelineHandler.run 只能修饰继承自 PipelineHandler 子类中的 predictevaluate方法。
    • predict 方法可以设置 replicas 参数。evaluate 方法的 replicas 值永远为1。
    • resources 参数与 @evaluation.predict@evaluation.evaluate 中的 resources 参数定义和使用方法保持一致。
    • PipelineHandler.run 修饰器是可选的。
    • PipelineHandler.run 仅在 Server 和 Cloud 实例中生效,Standalone 实例不支持资源定义。
    @classmethod
    def run(
    cls, resources: t.Optional[t.Dict[str, t.Any]] = None, replicas: int = 1
    ) -> t.Callable:

    使用示例

    import typing as t

    import torch
    from starwhale import PipelineHandler

    class Example(PipelineHandler):
    def __init__(self) -> None:
    super().__init__()
    self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    self.model = self._load_model(self.device)

    @PipelineHandler.run(replicas=4, resources={"memory": 1 * 1024 * 1024 *1024, "nvidia.com/gpu": 1}) # 1G Memory, 1 GPU
    def predict(self, data: t.Dict):
    data_tensor = self._pre(data.img)
    output = self.model(data_tensor)
    return self._post(output)

    @PipelineHandler.run(resources={"memory": 1 * 1024 * 1024 *1024}) # 1G Memory
    def evaluate(self, ppl_result):
    result, label, pr = [], [], []
    for _data in ppl_result:
    label.append(_data["input"]["label"])
    result.extend(_data["output"][0])
    pr.extend(_data["output"][1])
    return label, result, pr

    def _pre(self, input: Image) -> torch.Tensor:
    ...

    def _post(self, input):
    ...

    def _load_model(self, device):
    ...

    Context

    执行模型评测过程中传入的上下文信息,包括Project、Task ID等。Context 的内容是自动注入的,可以通过如下方式使用:

    • 继承 PipelineHandler 类内使用 self.context 对象。
    • 通过 Context.get_runtime_context() 获取。

    需要注意,只有在模型评测过程中,才能使用Context,否则程序会抛出异常。

    目前Context可以获得如下值:

    • project: str
      • Project 名字。
    • version: str
      • 模型评测的唯一ID。
    • step: str
      • Step 名字。
    • total: int
      • Step 下所有 Task 的数量。
    • index: int
      • Task 索引标号,下标从0开始。
    • dataset_uris: List[str]
      • Starwhale 数据集的URI 列表。

    使用示例


    from starwhale import Context, PipelineHandler

    def func():
    ctx = Context.get_runtime_context()
    print(ctx.project)
    print(ctx.version)
    print(ctx.step)
    ...

    class Example(PipelineHandler):

    def predict(self, data: t.Dict):
    print(self.context.project)
    print(self.context.version)
    print(self.context.step)

    @starwhale.api.service.api

    @starwhale.api.service.api 是一个修饰器,提供基于 Gradio 的简易 Web Handler 输入定义,当用户使用 swcli model serve 命令启动 Web Service 接收外部请求,并将推理结果返回给用户,实现在线评测。

    使用示例

    import gradio
    from starwhale.api.service import api

    def predict_image(img):
    ...

    @api(gradio.File(), gradio.Label())
    def predict_view(file: t.Any) -> t.Any:
    with open(file.name, "rb") as f:
    data = Image(f.read(), shape=(28, 28, 1))
    _, prob = predict_image({"img": data})
    return {i: p for i, p in enumerate(prob)}

    starwhale.api.service.Service

    如果希望自定义 web service 的实现, 可以继承 Service 并重写 serve 函数即可。

    class CustomService(Service):
    def serve(self, addr: str, port: int, handler_list: t.List[str] = None) -> None:
    ...

    svc = CustomService()

    @svc.api(...)
    def handler(data):
    ...

    说明:

    • 使用 PipelineHandler.add_api 函数添加的 handler 和 api 以及实例化的 Service.api decorator 添加的 handler 可以同时生效
    • 如果使用自定义的 Service, 需要在 model 中实例化自定义的 Service 类

    自定义 Request 和 Response

    Request 和 Response 分别是用于接收用户请求和返回给用户结果的处理类, 可以简单的理解成是 handler 的前处理和后处理逻辑

    Starwhale 将支持 Dataset 内置类型的 Request 实现以及 Json Response 的实现, 同时用户可以自定义处理逻辑来使用, 自定义的示例如下:

    import typing as t

    from starwhale.api.service import (
    Request,
    Service,
    Response,
    )

    class CustomInput(Request):
    def load(self, req: t.Any) -> t.Any:
    return req


    class CustomOutput(Response):
    def __init__(self, prefix: str) -> None:
    self.prefix = prefix

    def dump(self, req: str) -> bytes:
    return f"{self.prefix} {req}".encode("utf-8")

    svc = Service()

    @svc.api(request=CustomInput(), response=CustomOutput("hello"))
    def foo(data: t.Any) -> t.Any:
    ...
    - + \ No newline at end of file diff --git a/zh/next/reference/sdk/job/index.html b/zh/next/reference/sdk/job/index.html index 694baf4c3..5f5e06f08 100644 --- a/zh/next/reference/sdk/job/index.html +++ b/zh/next/reference/sdk/job/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    Starwhale 任务 SDK

    job

    通过Job URI参数获取 starwhale.Job 对象,可以获得 Standalone/Server/Cloud 实例上的任务。

    @classmethod
    def job(
    cls,
    uri: str,
    ) -> Job:

    参数

    • uri: (str, required)
      • Job URI格式的字符串。

    使用示例

    from starwhale import job
    # get job object of uri=https://server/job/1
    j1 = job("https://server/job/1")
    # get job from standalone instance
    j2 = job("local/project/self/job/xm5wnup")
    j3 = job("xm5wnup")

    class starwhale.Job

    starwhale.Job 实现对 Starwhale 任务的抽象,能够对 Standalone/Server/Cloud 实例上的任务进行一些信息获取类的操作。

    list

    list 是一个 classmethod 方法,能够列出某个项目下的任务。

    @classmethod
    def list(
    cls,
    project: str = "",
    page_index: int = DEFAULT_PAGE_IDX,
    page_size: int = DEFAULT_PAGE_SIZE,
    ) -> t.Tuple[t.List[Job], t.Dict]:

    参数

    • project: (str, optional)
      • Project URI,Standalone/Server/Cloud 实例上的项目都可以。
      • 若不指定 project 参数,则使用 swcli project selected 命令选定的项目。
    • page_index: (int, optional)
      • 当获取 Server/Cloud 实例的项目列表时,支持翻页操作,该参数可以指定页面序号。
        • 默认值为 1。
        • 页面起始序号为 1。
      • Standalone 实例不支持翻页操作,设置该参数无效。
    • page_size: (int, optional)
      • 当获取 Server/Cloud 实例的项目列表时,支持翻页操作,该参数可以指定每页返回的任务数量。
        • 默认值为 1。
        • 页面起始序号为 1。
      • Standalone 实例不支持翻页操作,设置该参数无效。

    使用示例

    from starwhale import Job
    # list jobs of current selected project
    jobs, pagination_info = Job.list()
    # list jobs of starwhale/public project in the cloud.starwhale.cn instance
    jobs, pagination_info = Job.list("https://cloud.starwhale.cn/project/starwhale:public")
    # list jobs of id=1 project in the server instance, page index is 2, page size is 10
    jobs, pagination_info = Job.list("https://server/project/1", page_index=2, page_size=10)

    get

    get 是一个 classmethod 方法,能够获得某个特定任务的信息,返回 Starwhale.Job 对象,与 starwhale.job 函数功能和参数定义上完全一致。

    使用示例

    from starwhale import Job
    # get job object of uri=https://server/job/1
    j1 = Job.get("https://server/job/1")
    # get job from standalone instance
    j2 = Job.get("local/project/self/job/xm5wnup")
    j3 = Job.get("xm5wnup")

    summary

    summary 是一个 property 属性,返回任务运行中写入 summary 表中的数据,字典类型。

    @property
    def summary(self) -> t.Dict[str, t.Any]:

    使用示例

    from starwhale import jobs
    j1 = job("https://server/job/1")
    print(j1.summary)

    tables

    tables 是一个 property 属性,返回任务运行中创建的表名(不包括 summary 表,以为 summary 表是项目级别自动创建的),列表类型。

    @property
    def tables(self) -> t.List[str]:

    使用示例

    from starwhale import jobs
    j1 = job("https://server/job/1")
    print(j1.tables)

    get_table_rows

    get_table_rows 是一个 method 方法,可以根据表名等参数返回数据表的记录,迭代器类型。

    def get_table_rows(
    self,
    name: str,
    start: t.Any = None,
    end: t.Any = None,
    keep_none: bool = False,
    end_inclusive: bool = False,
    ) -> t.Iterator[t.Dict[str, t.Any]]:

    参数

    • name: (str, required)
      • datastore 表名。通过 tables 属性获得的表名,可以传给 name 参数。
    • start: (Any, optional)
      • 返回记录中,ID的起始值。
      • 默认值为 None,表示从头开始。
    • end: (Any, optional)
      • 返回记录中,ID的结束值。
      • 默认值为 None ,表示一直到表末尾。
      • startend 都为 None,则会以迭代器方式返回整个表的数据。
    • keep_none: (bool, optional)
      • 是否返回值为 None的记录。
      • 默认为 False。
    • end_inclusive: (bool, optional)
      • end 参数设置时,迭代记录的时候,是否包含end记录。
      • 默认为 False。

    使用示例

    from starwhale import job
    j = job("local/project/self/job/xm5wnup")
    table_name = j.tables[0]
    for row in j.get_table_rows(table_name):
    print(row)
    rows = list(j.get_table_rows(table_name, start=0, end=100))
    # return the first record from the results table
    result = list(j.get_table_rows('results', start=0, end=1))[0]

    status

    status 是一个 property 属性,返回当前Job的实时状态,字符串类型,状态包含 CREATED, READY, PAUSED, RUNNING, CANCELLING, CANCELED, SUCCESS, FAILUNKNOWN

    @property
    def status(self) -> str:

    create

    create 是一个 classmethod 方法,能够创建 Standalone 实例或 Server/Cloud 实例上的任务,包括Model Evluation, Fine-tuning, Online Serving 和 Developing 等类型的任务。函数返回 Job 类型的对象。

    • create 通过 project 参数决定生成的任务运行在何种实例上,包括 Standalone 和 Server/Cloud 实例。
    • 在 Standalone 实例下,create 创建一个同步执行的任务。
    • 在 Server/Cloud 实例下, create 创建一个异步执行的任务。
    @classmethod
    def create(
    cls,
    project: Project | str,
    model: Resource | str,
    run_handler: str,
    datasets: t.List[str | Resource] | None = None,
    runtime: Resource | str | None = None,
    resource_pool: str = DEFAULT_RESOURCE_POOL,
    ttl: int = 0,
    dev_mode: bool = False,
    dev_mode_password: str = "",
    dataset_head: int = 0,
    overwrite_specs: t.Dict[str, t.Any] | None = None,
    ) -> Job:

    参数

    对所有实例都生效的参数:

    • project: (Project|str, required)
      • Project 对象或 Project URI 字符串。
    • model: (Resource|str, required)
      • Model URI 字符串或 Model 类型的 Resource 对象,表示要运行的 Starwhale 模型包。
    • run_handler: (str, required)
      • Starwhale 模型包中对应的可运行的 handler 名称,比如 mnist 的 evaluate handler: mnist.evaluator:MNISTInference.evaluate
    • datasets: (List[str | Resource], optional)
      • Starwhale 模型包运行所需要的数据集,非必需。

    仅对 Standalone 实例生效的参数:

    • dataset_head: (int, optional)
      • 一般用于调试场景,只使用数据集前 N 条数据来供 Starwhale 模型来消费。

    仅对 Server/Cloud 实例生效的参数:

    • runtime: (Resource | str, optional)
      • Runtime URI 字符串或 Runtime 类型的 Resource 对象,表示要运行任务所需要的 Starwhale 运行时。
      • 当不指定该参数时,会尝试使用 Starwhale 模型包的内建运行时。
      • 创建 Standalone 实例下的任务,使用 Python 脚本所用的 Python 解释器环境作为自己的运行时,不支持通过 runtime 参数指定运行时。若有指定运行时的需要,可以使用 swcli model run 命令。
    • resource_pool: (str, optional)
      • 指定任务运行在哪个资源池中,默认为 default 资源池。
    • ttl: (int, optional)
      • 任务最大存活时间,超时后会被杀掉。
      • 参数单位为秒。
      • 默认情况下,ttl为0,表示没有超时限制,任务会按预期运行。
      • 当ttl小于0时,也表示没有超时限制。
    • dev_mode: (bool, optional)
      • 是否设置为调试模式。开启此模式后,可以通过VSCode Web进入到相关环境中。
      • 默认不进入调试模式。
    • dev_mode_password: (str, optional)
      • 调试模式下VSCode Web的登录密码。
      • 默认为空,此时会用任务的UUID作为密码,可以通过 job.info().job.uuid 获得。
    • overwrite_specs: (Dict[str, Any], optional)
      • 支持设置 handler 的 replicasresources 字段。
      • 若为空,则使用模型包中对应 handler 设置的值。
      • overwrite_specs 的 key 为 handler 的名字,比如 mnist 的 evaluate handler: mnist.evaluator:MNISTInference.evaluate
      • overwrite_specs 的 value 为设置的值,字典格式,支持设置 replicasresources , 比如 {"replicas": 1, "resources": {"memory": "1GiB"}}

    使用示例

    • 创建一个 Cloud 实例的任务
    from starwhale import Job
    project = "https://cloud.starwhale.cn/project/starwhale:public"
    job = Job.create(
    project=project,
    model=f"{project}/model/mnist/version/v0",
    run_handler="mnist.evaluator:MNISTInference.evaluate",
    datasets=[f"{project}/dataset/mnist/version/v0"],
    runtime=f"{project}/runtime/pytorch",
    overwrite_specs={"mnist.evaluator:MNISTInference.evaluate": {"resources": "4GiB"},
    "mnist.evaluator:MNISTInference.predict": {"resources": "8GiB", "replicas": 10}}
    )
    print(job.status)
    • 创建一个 Standalone 实例的任务
    from starwhale import Job
    job = Job.create(
    project="self",
    model="mnist",
    run_handler="mnist.evaluator:MNISTInference.evaluate",
    datasets=["mnist"],
    )
    print(job.status)
    - + \ No newline at end of file diff --git a/zh/next/reference/sdk/model/index.html b/zh/next/reference/sdk/model/index.html index 924d9ba12..f37ad3c49 100644 --- a/zh/next/reference/sdk/model/index.html +++ b/zh/next/reference/sdk/model/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    Starwhale 模型 SDK

    model.build

    model.build 是一个函数,能够构建 Starwhale 模型,等价于 swcli model build 命令。

    def build(
    modules: t.Optional[t.List[t.Any]] = None,
    workdir: t.Optional[_path_T] = None,
    name: t.Optional[str] = None,
    project_uri: str = "",
    desc: str = "",
    remote_project_uri: t.Optional[str] = None,
    add_all: bool = False,
    tags: t.List[str] | None = None,
    ) -> None:

    参数

    • modules: (List[str|object], optional)
      • 构建时导入的模块,为列表类型,可以指定多个模块。
      • 模块类型包含两种:
        • 字符串类型: Python 可 Import 的路径,比如 "to.path.module", "to.path.module:object" 。
        • Python 对象: model.build 函数会自动解析所对应的模块。
      • 如果不指定,则会搜索当前已经导入的模块。
    • name: (str, optional)
      • Starwhale 模型的名称。
      • 若不指定,则会使用 cwd 目录名作为 Starwhale 模型的名称。
    • workdir: (str, Pathlib.Path, optional)
      • Starwhale 模型打包的根目录,此目录下的文件会被打包。
    • project_uri: (str, optional)
      • Project URI,表示该模型属于哪个项目。
      • 默认为 swcli project select 选择的项目。
    • desc: (str, optional)
      • 描述信息,默认为空。
    • remote_project_uri: (str, optional)
      • 其他示例的项目 URI,构建完Starwhale 模型后,会被自动复制到远端实例中。
    • add_all: (bool, optional)
      • Starwhale 模型打包的时候会自动忽略一些类似 pyc/venv/conda 构建目录等,可以通过该参数将这些文件也进行打包。即使该参数使用,也不影响 .swignore 文件的预期作用。
      • 默认为 False
    • tags: (List[str], optional)
      • 用户自定义标签。
      • 不能指定 latest^v\d+$ 这两个 Starwhale 系统内建标签。

    使用示例

    from starwhale import model

    # class search handlers
    from .user.code.evaluator import ExamplePipelineHandler
    model.build([ExamplePipelineHandler])

    # function search handlers
    from .user.code.evaluator import predict_image
    model.build([predict_image])

    # module handlers, @handler decorates function in this module
    from .user.code import evaluator
    model.build([evaluator])

    # str search handlers
    model.build(["user.code.evaluator:ExamplePipelineHandler"])
    model.build(["user.code1", "user.code2"])

    # no search handlers, use imported modules
    model.build()

    # add user custom tags
    model.build(tags=["t1", "t2"])
    - + \ No newline at end of file diff --git a/zh/next/reference/sdk/other/index.html b/zh/next/reference/sdk/other/index.html index 07e9dba29..6fe0095f1 100644 --- a/zh/next/reference/sdk/other/index.html +++ b/zh/next/reference/sdk/other/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    其他 SDK

    __version__

    Starwhale Python SDK 和 swcli 版本,是字符串常量。

    >>> from starwhale import __version__
    >>> print(__version__)
    0.5.7

    init_logger

    init_logger 用来设置日志输出级别。默认为0

    • 0: 输出 errors 信息,traceback 呈现最近的1个堆栈。
    • 1: 输出 errors + warnings 信息,traceback 呈现最近的5个堆栈内容。
    • 2: 输出 errors + warnings + info 信息,trackback 呈现最多10个堆栈内容。
    • 3: 输出 errors + warnings + info + debug 信息,trackback 呈现最多100个堆栈内容。
    • >=4: 输出 errors + warnings + info + debug + trace 信息,trackback 呈现最多1000个堆栈内容。
    def init_logger(verbose: int = 0) -> None:

    login

    登录 server/cloud 实例,等价于 swcli instance login 命令。登录 Standalone 实例是无意义的。

    def login(
    instance: str,
    alias: str = "",
    username: str = "",
    password: str = "",
    token: str = "",
    ) -> None:

    参数

    • instance: (str, required)
      • server/cloud 实例的 http url。
    • alias: (str, optional)
      • 实例的别名,可以简化 Starwhale URI 中 instance部分。
      • 若不指定,则使用实例的 http url 中 hostname 部分。
    • username: (str, optional)
    • password: (str, optional)
    • token: (str, optional)
      • username + passwordtoken 只能选择一种方式登录实例。

    使用示例

    from starwhale import login

    # login to Starwhale Cloud instance by token
    login(instance="https://cloud.starwhale.cn", alias="cloud-cn", token="xxx")

    # login to Starwhale Server instance by username and password
    login(instance="http://controller.starwhale.svc", alias="dev", username="starwhale", password="abcd1234")

    logout

    登出 server/cloud 实例, 等价于 swcli isntance logout 命令。登出 Standalone 实例是无意义的。

    def logout(instance: str) -> None:

    使用示例

    from starwhale import login, logout

    login(instance="https://cloud.starwhale.cn", alias="cloud-cn", token="xxx")
    # logout by the alias
    logout("cloud-cn")

    login(instance="http://controller.starwhale.svc", alias="dev", username="starwhale", password="abcd1234")
    # logout by the instance http url
    logout("http://controller.starwhale.svc")
    - + \ No newline at end of file diff --git a/zh/next/reference/sdk/overview/index.html b/zh/next/reference/sdk/overview/index.html index 73fb0d262..9ab02ce7f 100644 --- a/zh/next/reference/sdk/overview/index.html +++ b/zh/next/reference/sdk/overview/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    Python SDK 概览

    Starwhale 提供一系列的 Python SDK,帮助用户管理数据集、模型和评测等调用,使用 Starwhale Python SDK 能让您更好的完成 ML/DL 开发任务。

    • class PipelineHandler: 提供默认的模型评测过程定义,需要用户实现 predictevaluate 函数。
    • class Context: 执行模型评测过程中传入的上下文信息,包括 Project、Task ID 等。
    • class Dataset: Starwhale 数据集类。
    • class starwhale.api.service.Service: 在线评测的基础类。
    • class Job: 提供 Job 相关的操作。
    • class Evaluation: 提供 Evaluation 的 log 和 scan 相关的操作。

    函数

    • @multi_classification: 修饰器,适用于多分类问题,用来简化 evaluate 结果的进一步计算和结果存储,能更好的呈现评测结果。
    • @handler: 修饰器,定义一个带有资源属性(mem/cpu/gpu)的运行实体,可以控制副本数。多个Handlers可以通过依赖关系,生成DAG,便于控制执行流程。
    • @evaluation.predict: 修饰器,定义模型评测中的推理过程,类似 MapReduce 中的 map 阶段。
    • @evaluation.evaluate: 修饰器,定义模型评测中的评测过程,类似 MapReduce 中的 reduce 阶段。
    • model.build: 进行 Starwhale 模型构建。
    • @fine_tune: 修饰器,定义模型训练的微调(fine-tune)过程。
    • init_logger: 设置日志输出级别,实现五种级别日志输出。
    • dataset: 获取 starwhale.Dataset 对象,包括创建新的数据集和加载已经存在的数据集两种方式。
    • @starwhale.api.service.api: 修饰器,提供基于 Gradio 的简易 Web Handler 输入定义,实现在线评测。
    • login: 登录 server/cloud 实例。
    • logout: 登出 server/cloud 实例。
    • job: 根据Job URI获得 starwhale.Job 对象。
    • @PipelineHandler.run: 修饰器,定义 PipelineHandler 子类中 predict 和 evaluate 方法的资源。

    数据类型

    • COCOObjectAnnotation: 提供COCO类型的定义。
    • BoundingBox: 边界框类型,目前为 LTWH 格式,即 left_x, top_y, widthheight
    • ClassLabel: 描述label的数量和类型。
    • Image: 图片类型。
    • GrayscaleImage: 灰度图类型,比如MNIST中数字手写体图片,是 Image 类型的一个特例。
    • Audio: 音频类型。
    • Video: 视频类型。
    • Text: 文本类型,默认为 utf-8 格式,用来存储大文本。
    • Binary: 二进制类型,用 bytes 存储,用来存储比较大的二进制内容。
    • Line: 直线类型。
    • Point: 点类型。
    • Polygon: 多边形类型。
    • Link: Link类型,用来制作 remote-link 类型的数据。
    • MIMEType: 描述 Starwhale 支持的多媒体类型,用在 ImageVideo 等类型的 mime_type 属性上,能更好的进行 Dataset Viewer。

    其他

    • __version__: Starwhale Python SDK 和 swcli 版本,是字符串常量。

    进一步阅读建议

    - + \ No newline at end of file diff --git a/zh/next/reference/sdk/type/index.html b/zh/next/reference/sdk/type/index.html index 172f2f9bc..d31caa012 100644 --- a/zh/next/reference/sdk/type/index.html +++ b/zh/next/reference/sdk/type/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    Starwhale 数据类型 SDK

    COCOObjectAnnotation

    提供COCO类型的定义。

    COCOObjectAnnotation(
    id: int,
    image_id: int,
    category_id: int,
    segmentation: Union[t.List, t.Dict],
    area: Union[float, int],
    bbox: Union[BoundingBox, t.List[float]],
    iscrowd: int,
    )
    参数说明
    idobject id,一般为全局object的递增id
    image_idimage id,一般为图片id
    category_idcategory id,一般为目标检测中类别的id
    segmentation物体轮廓表示,Polygon(多边形的点)或RLE格式
    areaobject面积
    bbox表示bounding box,可以为BoundingBox类型或float的列表
    iscrowd0表示是一个单独的object,1表示两个没有分开的object

    使用示例

    def _make_coco_annotations(
    self, mask_fpath: Path, image_id: int
    ) -> t.List[COCOObjectAnnotation]:
    mask_img = PILImage.open(str(mask_fpath))

    mask = np.array(mask_img)
    object_ids = np.unique(mask)[1:]
    binary_mask = mask == object_ids[:, None, None]
    # TODO: tune permute without pytorch
    binary_mask_tensor = torch.as_tensor(binary_mask, dtype=torch.uint8)
    binary_mask_tensor = (
    binary_mask_tensor.permute(0, 2, 1).contiguous().permute(0, 2, 1)
    )

    coco_annotations = []
    for i in range(0, len(object_ids)):
    _pos = np.where(binary_mask[i])
    _xmin, _ymin = float(np.min(_pos[1])), float(np.min(_pos[0]))
    _xmax, _ymax = float(np.max(_pos[1])), float(np.max(_pos[0]))
    _bbox = BoundingBox(
    x=_xmin, y=_ymin, width=_xmax - _xmin, height=_ymax - _ymin
    )

    rle: t.Dict = coco_mask.encode(binary_mask_tensor[i].numpy()) # type: ignore
    rle["counts"] = rle["counts"].decode("utf-8")

    coco_annotations.append(
    COCOObjectAnnotation(
    id=self.object_id,
    image_id=image_id,
    category_id=1, # PennFudan Dataset only has one class-PASPersonStanding
    segmentation=rle,
    area=_bbox.width * _bbox.height,
    bbox=_bbox,
    iscrowd=0, # suppose all instances are not crowd
    )
    )
    self.object_id += 1

    return coco_annotations

    GrayscaleImage

    提供灰度图类型,比如MNIST中数字手写体图片,是 Image 类型的一个特例。

    GrayscaleImage(
    fp: _TArtifactFP = "",
    display_name: str = "",
    shape: Optional[_TShape] = None,
    as_mask: bool = False,
    mask_uri: str = "",
    )
    参数说明
    fp图片的路径、IO对象或文件内容的bytes
    display_nameDataset Viewer上展示的名字
    shape图片的Width和Height,channel默认为1
    as_mask是否作为Mask图片
    mask_uriMask原图的URI

    使用示例

    for i in range(0, min(data_number, label_number)):
    _data = data_file.read(image_size)
    _label = struct.unpack(">B", label_file.read(1))[0]
    yield GrayscaleImage(
    _data,
    display_name=f"{i}",
    shape=(height, width, 1),
    ), {"label": _label}

    GrayscaleImage函数

    GrayscaleImage.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    GrayscaleImage.carry_raw_data

    carry_raw_data() -> GrayscaleImage

    GrayscaleImage.astype

    astype() -> Dict[str, t.Any]

    BoundingBox

    提供边界框类型,目前为 LTWH 格式,即 left_x, top_y, widthheight

    BoundingBox(
    x: float,
    y: float,
    width: float,
    height: float
    )
    参数说明
    xleft_x的坐标
    ytop_y的坐标
    width图片的宽度
    height图片的高度

    ClassLabel

    描述label的数量和类型。

    ClassLabel(
    names: List[Union[int, float, str]]
    )

    Image

    图片类型。

    Image(
    fp: _TArtifactFP = "",
    display_name: str = "",
    shape: Optional[_TShape] = None,
    mime_type: Optional[MIMEType] = None,
    as_mask: bool = False,
    mask_uri: str = "",
    )
    参数说明
    fp图片的路径、IO对象或文件内容的bytes
    display_nameDataset Viewer上展示的名字
    shape图片的Width、Height和channel
    mime_typeMIMEType支持的类型
    as_mask是否作为Mask图片
    mask_uriMask原图的URI

    使用示例

    import io
    import typing as t
    import pickle
    from PIL import Image as PILImage
    from starwhale import Image, MIMEType

    def _iter_item(paths: t.List[Path]) -> t.Generator[t.Tuple[t.Any, t.Dict], None, None]:
    for path in paths:
    with path.open("rb") as f:
    content = pickle.load(f, encoding="bytes")
    for data, label, filename in zip(
    content[b"data"], content[b"labels"], content[b"filenames"]
    ):
    annotations = {
    "label": label,
    "label_display_name": dataset_meta["label_names"][label],
    }

    image_array = data.reshape(3, 32, 32).transpose(1, 2, 0)
    image_bytes = io.BytesIO()
    PILImage.fromarray(image_array).save(image_bytes, format="PNG")

    yield Image(
    fp=image_bytes.getvalue(),
    display_name=filename.decode(),
    shape=image_array.shape,
    mime_type=MIMEType.PNG,
    ), annotations

    Image函数

    Image.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Image.carry_raw_data

    carry_raw_data() -> GrayscaleImage

    Image.astype

    astype() -> Dict[str, t.Any]

    Video

    视频类型。

    Video(
    fp: _TArtifactFP = "",
    display_name: str = "",
    mime_type: Optional[MIMEType] = None,
    )
    参数说明
    fp视频的路径、IO对象或文件内容的bytes
    display_nameDataset Viewer上展示的名字
    mime_typeMIMEType支持的类型

    使用示例

    import typing as t
    from pathlib import Path

    from starwhale import Video, MIMEType

    root_dir = Path(__file__).parent.parent
    dataset_dir = root_dir / "data" / "UCF-101"
    test_ds_path = [root_dir / "data" / "test_list.txt"]

    def iter_ucf_item() -> t.Generator:
    for path in test_ds_path:
    with path.open() as f:
    for line in f.readlines():
    _, label, video_sub_path = line.split()

    data_path = dataset_dir / video_sub_path
    data = Video(
    data_path,
    display_name=video_sub_path,
    shape=(1,),
    mime_type=MIMEType.WEBM,
    )

    yield f"{label}_{video_sub_path}", {
    "video": data,
    "label": label,
    }

    Audio

    音频类型。

    Audio(
    fp: _TArtifactFP = "",
    display_name: str = "",
    mime_type: Optional[MIMEType] = None,
    )
    参数说明
    fp音频文件的路径、IO对象或文件内容的bytes
    display_nameDataset Viewer上展示的名字
    mime_typeMIMEType支持的类型

    使用示例

    import typing as t
    from starwhale import Audio

    def iter_item() -> t.Generator[t.Tuple[t.Any, t.Any], None, None]:
    for path in validation_ds_paths:
    with path.open() as f:
    for item in f.readlines():
    item = item.strip()
    if not item:
    continue

    data_path = dataset_dir / item
    data = Audio(
    data_path, display_name=item, shape=(1,), mime_type=MIMEType.WAV
    )

    speaker_id, utterance_num = data_path.stem.split("_nohash_")
    annotations = {
    "label": data_path.parent.name,
    "speaker_id": speaker_id,
    "utterance_num": int(utterance_num),
    }
    yield data, annotations

    Audio函数

    Audio.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Audio.carry_raw_data

    carry_raw_data() -> Audio

    Audio.astype

    astype() -> Dict[str, t.Any]

    Text

    文本类型,默认为 utf-8 格式。

    Text(
    content: str,
    encoding: str = "utf-8",
    )
    参数说明
    contenttext内容
    encodingtext的编码格式

    使用示例

    import typing as t
    from pathlib import Path
    from starwhale import Text

    def iter_item(self) -> t.Generator[t.Tuple[t.Any, t.Any], None, None]:
    root_dir = Path(__file__).parent.parent / "data"

    with (root_dir / "fra-test.txt").open("r") as f:
    for line in f.readlines():
    line = line.strip()
    if not line or line.startswith("CC-BY"):
    continue

    _data, _label, *_ = line.split("\t")
    data = Text(_data, encoding="utf-8")
    annotations = {"label": _label}
    yield data, annotations

    Text函数

    to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Text.carry_raw_data

    carry_raw_data() -> Text

    Text.astype

    astype() -> Dict[str, t.Any]

    Text.to_str

    to_str() -> str

    Binary

    二进制类型,用bytes存储。

    Binary(
    fp: _TArtifactFP = "",
    mime_type: MIMEType = MIMEType.UNDEFINED,
    )
    参数说明
    fp路径、IO对象或文件内容的bytes
    mime_typeMIMEType支持的类型

    Binary函数

    Binary.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Binary.carry_raw_data

    carry_raw_data() -> Binary

    Binary.astype

    astype() -> Dict[str, t.Any]

    Link类型,用来制作 remote-link 类型的数据集。

    Link(
    uri: str,
    auth: Optional[LinkAuth] = DefaultS3LinkAuth,
    offset: int = 0,
    size: int = -1,
    data_type: Optional[BaseArtifact] = None,
    )
    参数说明
    uri原始数据的uri地址,目前支持localFS和S3两种协议
    authLink Auth信息
    offset数据相对uri指向的文件偏移量
    size数据大小
    data_typeLink指向的实际数据类型,目前支持 Binary, Image, Text, AudioVideo 类型

    Link函数

    Link.astype

    astype() -> Dict[str, t.Any]

    MIMEType

    描述Starwhale支持的多媒体类型,用Python Enum类型实现,用在 ImageVideo 等类型的mime_type 属性上,能更好的进行Dataset Viewer。

    class MIMEType(Enum):
    PNG = "image/png"
    JPEG = "image/jpeg"
    WEBP = "image/webp"
    SVG = "image/svg+xml"
    GIF = "image/gif"
    APNG = "image/apng"
    AVIF = "image/avif"
    PPM = "image/x-portable-pixmap"
    MP4 = "video/mp4"
    AVI = "video/avi"
    WEBM = "video/webm"
    WAV = "audio/wav"
    MP3 = "audio/mp3"
    PLAIN = "text/plain"
    CSV = "text/csv"
    HTML = "text/html"
    GRAYSCALE = "x/grayscale"
    UNDEFINED = "x/undefined"

    Line

    描述直线。

    from starwhale import ds, Point, Line

    with dataset("collections") as ds:
    line_points = [
    Point(x=0.0, y=1.0),
    Point(x=0.0, y=100.0)
    ]
    ds.append({"line": line_points})
    ds.commit()

    Point

    描述点。

    from starwhale import ds, Point

    with dataset("collections") as ds:
    ds.append(Point(x=0.0, y=100.0))
    ds.commit()

    Polygon

    描述多边形。

    from starwhale import ds, Point, Polygon

    with dataset("collections") as ds:
    polygon_points = [
    Point(x=0.0, y=1.0),
    Point(x=0.0, y=100.0),
    Point(x=2.0, y=1.0),
    Point(x=2.0, y=100.0),
    ]
    ds.append({"polygon": polygon_points})
    ds.commit()
    - + \ No newline at end of file diff --git a/zh/next/reference/swcli/dataset/index.html b/zh/next/reference/swcli/dataset/index.html index 4fab9c4bf..3baa61826 100644 --- a/zh/next/reference/swcli/dataset/index.html +++ b/zh/next/reference/swcli/dataset/index.html @@ -10,7 +10,7 @@ - + @@ -21,7 +21,7 @@ | --page | N | Integer | 1 | 起始页码,仅限Server和Cloud实例。 | | --size | N | Integer | 20 | 一页中的数据集数量,仅限Server和Cloud实例。 | | --filter-fl | N | String | | 仅显示符合条件的数据集。该选项可以在一个命令中被多次重复使用。 |

    过滤器类型说明范例
    nameKey-Value数据集名称前缀--filter name=mnist
    ownerKey-Value数据集所有者名字--filter owner=starwhale
    latestFlag如果指定了该选项,结果中仅显示最新版本。--filter latest

    swcli dataset recover

    swcli [全局选项] dataset recover [选项] <DATASET>

    dataset recover 恢复以前删除的Starwhale数据集或版本。

    DATASET 是一个数据集URI。如果URI不包含版本,则会恢复所有删除的版本。

    已经被垃圾回收或者使用 --force 选项删除的 Starwhale 数据集或版本无法使用本命令恢复。

    选项必填项类型默认值说明
    --force-fNBooleanFalse如果使用了该选项,当前同名的Starwhale数据集或版本会被强制覆盖。

    swcli dataset remove

    swcli [全局选项] dataset remove [选项] <DATASET>

    dataset remove 删除指定的 Starwhale 数据集或某个版本。

    DATASET 是一个数据集URI。如果URI不包含版本,则删除指定数据集的所有版本。软删除的 Starwhale 数据集,可以通过 swcli dataset recover 命令进行恢复(未进行垃圾回收)。

    被删除的Starwhale数据集或版本可以通过 swcli dataset list --show-removed 列出。

    选项必填项类型默认值说明
    --force-fNBooleanFalse使用此选项永久删除某个Starwhale数据集或版本。删除后不可恢复。

    swcli dataset summary

    swcli [全局选项] dataset summary <DATASET>

    显示数据集摘要信息。DATASET 是一个数据集URI

    swcli dataset tag

    swcli [全局选项] dataset tag [选项] <DATASET> [TAGS]...

    dataset tag 将标签附加到指定的Starwhale数据集版本,同时支持删除和列出所有标签的功能。可以在数据集URI中使用标签替代版本ID。

    DATASET是一个数据集URI

    每个数据集版本可以包含任意数量的标签,但同一数据集中不允许有重复的标签名称。

    选项必填项类型默认值说明
    --remove-rNBooleanFalse使用该选项删除标签
    --quiet-qNBooleanFalse使用该选项以忽略错误,例如删除不存在的标签。
    --force-add-fNBooleanFalse当向 server/cloud 实例中添加标签时,若遇到其他版本的数据集已经使用该标签会提示报错,强制更新时可以使用 --force-add 参数。

    数据集标签的例子

    #- list tags of the mnist dataset
    swcli dataset tag mnist

    #- add tags for the mnist dataset
    swcli dataset tag mnist t1 t2
    swcli dataset tag cloud://cloud.starwhale.cn/project/public:starwhale/dataset/mnist/version/latest t1 --force-ad
    swcli dataset tag mnist t1 --quiet

    #- remove tags for the mnist dataset
    swcli dataset tag mnist -r t1 t2
    swcli dataset tag cloud://cloud.starwhale.cn/project/public:starwhale/dataset/mnist --remove t1
    - + \ No newline at end of file diff --git a/zh/next/reference/swcli/index.html b/zh/next/reference/swcli/index.html index aca298ed0..1b2cca91c 100644 --- a/zh/next/reference/swcli/index.html +++ b/zh/next/reference/swcli/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    概述

    使用方式

    swcli [选项] <COMMAND> [参数]...
    备注

    swcliswstarwhale三个命令的作用是一样的。

    全局选项

    选项说明
    --version显示swcli的版本信息。
    --verbose-v日志中输出更多信息,当 -v 参数越多,呈现信息越多,最多支持4个 -v 参数。
    --help输出命令帮助信息。
    警告

    需要注意的是,全局参数需要跟在swcli之后,命令之前。

    命令

    - + \ No newline at end of file diff --git a/zh/next/reference/swcli/instance/index.html b/zh/next/reference/swcli/instance/index.html index 6fe925d69..6dc09eb32 100644 --- a/zh/next/reference/swcli/instance/index.html +++ b/zh/next/reference/swcli/instance/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    swcli instance

    概述

    swcli [全局选项] instance [选项] <SUBCOMMAND> [参数]

    instance命令包括以下子命令:

    • info
    • list (ls)
    • login
    • logout
    • use (select)

    swcli instance info

    swcli [全局选项] instance info [选项] <INSTANCE>

    instance info 输出指定 Starwhale 实例的详细信息。

    INSTANCE 是一个实例URI

    swcli instance list

    swcli [全局选项] instance list [选项]

    instance list 显示所有的 Starwhale 实例。

    swcli instance login

    swcli [全局选项] instance login [选项] <INSTANCE>

    instance login 连接到一个 Server/Cloud 实例并将它设置为默认实例.

    INSTANCE 是一个实例URI

    选项必填项类型默认值说明
    --usernameNString登录用户名
    --passwordNString登录密码
    --tokenNString登录令牌
    --aliasYString实例别名。您可以在任何需要实例URI的地方使用对应的别名替代。

    --username--password 不能和 --token 一起使用。

    swcli instance logout

    swcli [全局选项] instance logout [INSTANCE]

    instance logout 断开和 Server/Cloud 实例的连接并清除本地保存的信息。

    INSTANCE是一个实例URI。如果不指定,将使用默认实例

    swcli instance use

    swcli [全局选项] instance use <INSTANCE>

    instance use 将指定的实例设置为默认实例.

    INSTANCE 是一个实例URI

    - + \ No newline at end of file diff --git a/zh/next/reference/swcli/job/index.html b/zh/next/reference/swcli/job/index.html index 987ea9d05..bed3cca22 100644 --- a/zh/next/reference/swcli/job/index.html +++ b/zh/next/reference/swcli/job/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    swcli job

    概述

    swcli [全局选项] job [选项] <子命令> [参数]...

    job命令包括以下子命令:

    • cancel
    • info
    • list(ls)
    • pause
    • recover
    • remove(rm)
    • resume

    swcli job cancel

    swcli [全局选项] job cancel [选项] <JOB>

    job cancel 停止指定的作业。该命令在 Standalone 实例下只对容器方式运行的作业生效。

    JOB 是一个作业URI

    选项必填项类型默认值说明
    --force or -fNBooleanFalse如果为真,强制停止指定的作业。

    swcli job info

    swcli [全局选项] job info [选项] <JOB>

    job info 输出指定作业的详细信息。

    JOB 是一个作业URI

    swcli job list

    swcli [全局选项] job list [选项]

    job list显示所有的 Starwhale 作业。

    选项必填项类型默认值说明
    --projectNString要查看的项目的 URI。如果未指定此选项,则使用默认项目替代。
    --show-removedNBooleanFalse如果使用了该选项,则结果中会包含已删除但未被垃圾回收的作业。
    --pageNInteger1起始页码。仅限 Server 和 Cloud 实例。
    --sizeNInteger20一页中的作业数。仅限 Server 和 Cloud 实例。

    swcli job pause

    swcli [全局选项] job pause [选项] <JOB>

    job pause 暂停指定的作业. 被暂停的作业可以使用 job resume 恢复。该命令在 Standalone 实例下只对容器方式运行的作业生效。

    JOB 是一个作业URI

    pausecancel 功能上基本相同。它们的差别在于被暂停的作业会保留作业ID,在恢复时继续使用。作业的开发者需要定期保存作业数据并在恢复的时候重新加载相关数据。作业ID 可以用作保存数据的键值。

    选项必填项类型默认值说明
    --force or -fNBooleanFalse如果为真,强制停止指定的作业。

    swcli job resume

    swcli [全局选项] job resume [选项] <JOB>

    job resume 恢复指定的作业。该命令在 Standalone 实例下只对容器方式运行的作业生效。

    JOB 是一个作业URI

    - + \ No newline at end of file diff --git a/zh/next/reference/swcli/model/index.html b/zh/next/reference/swcli/model/index.html index 86424f3ca..26227114f 100644 --- a/zh/next/reference/swcli/model/index.html +++ b/zh/next/reference/swcli/model/index.html @@ -10,14 +10,14 @@ - +
    版本:WIP

    swcli model

    概述

    swcli [全局选项] model [选项] <SUBCOMMAND> [参数]...

    model命令包括以下子命令:

    • build
    • copy(cp)
    • diff
    • extract
    • history
    • info
    • list(ls)
    • recover
    • remove(rm)
    • run
    • serve
    • tag

    swcli model build

    swcli [全局选项] model build [选项] <WORKDIR>

    model build 会将整个 WORKDIR 打包到Starwhale模型中,.swignore匹配的文件除外。

    model build 会导入 --module 参数指定的模块,然后生成运行模型所需要的配置。如果您指定的模块依赖第三方库,我们强烈建议您使用 --runtime 选项。如果不指定该选项,您需要确保 swcli 所使用的 Python 环境已经安装了相关的依赖库。

    选项必填项类型默认值说明
    --project-pNString默认项目项目URI
    --model-yaml-fNString${workdir}/model.yamlmodel.yaml 文件路径,默认会尝试使用 ${workdir}/model.yaml 文件。model.yaml 对于模型构建并非必需的。
    --module-mNString构建时导入的模块。Starwhale 会将这些模块中包含的 handler 导出到模型包。该参数可以指定多次,用来导入多个 Python 模块。
    --runtimeNString运行此命令时使用的 Starwhale Runtime的URI。如果指定此选项,该命令将在 Starwhale 运行时指定的独立 Python 环境中运行。否则它将直接在 swcli 当前的 Python 环境中运行。
    --name-nNString模型包的名字
    --desc-dNString模型包的描述
    --package-runtime--no-package-runtimeNBooleanTrue当使用 --runtime 参数时,默认情况下,会将对应的 Starwhale 运行时变成 Starwhale 模型的内置运行时。可以通过 --no-package-runtime 参数禁用该特性。
    --add-allNBooleanFalseStarwhale 模型打包的时候会自动忽略一些类似 pyc/venv/conda 构建目录等,可以通过该参数将这些文件也进行打包。即使该参数使用,也不影响 .swignore 文件的预期作用。
    -t--tagN全局String

    Starwhale 模型构建的例子

    # build by the model.yaml in current directory and model package will package all the files from the current directory.
    swcli model build .
    # search model run decorators from mnist.evaluate, mnist.train and mnist.predict modules, then package all the files from the current directory to model package.
    swcli model build . --module mnist.evaluate --module mnist.train --module mnist.predict
    # build model package in the Starwhale Runtime environment.
    swcli model build . --module mnist.evaluate --runtime pytorch/version/v1
    # forbid to package Starwhale Runtime into the model.
    swcli model build . --module mnist.evaluate --runtime pytorch/version/v1 --no-package-runtime
    # build model package with tags.
    swcli model build . --tag tag1 --tag tag2

    swcli model copy

    swcli [全局选项] model copy [选项] <SRC> <DEST>

    model copy 将模型从 SRC 复制到 DEST,用来实现不同实例的模型分享。这里 SRCDEST 都是模型URI

    Starwhale 模型复制时,默认会带上用户自定义的所有标签,可以使用 --ignore-tag 参数,忽略某些标签。另外,latest^v\d+$ 标签是 Starwhale 系统内建标签,只在当前实例中使用,不会拷贝到其他实例中。

    选项必填项类型默认值说明
    --force-fNBooleanFalse如果为true,DEST已经存在时会被强制覆盖。否则此命令会显示一条错误消息。另外,如果复制时携带的标签已经被其他版本使用,通过该参数可以强制更新标签到此版本上。
    -i--ignore-tagNString可以指定多次,忽略多个用户自定义标签。

    Starwhale 模型复制的例子

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local project(myproject) with a new model name 'mnist-local'
    swcli model cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq local/project/myproject/mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local default project(self) with the cloud instance model name 'mnist-cloud'
    swcli model cp cloud://pre-k8s/project/model/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq .

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local project(myproject) with the cloud instance model name 'mnist-cloud'
    swcli model cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq . -dlp myproject

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local default project(self) with a model name 'mnist-local'
    swcli model cp cloud://pre-k8s/project/model/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local project(myproject) with a model name 'mnist-local'
    swcli model cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local -dlp myproject

    #- copy standalone instance(local) default project(self)'s mnist-local model to cloud instance(pre-k8s) mnist project with a new model name 'mnist-cloud'
    swcli model cp mnist-local/version/latest cloud://pre-k8s/project/mnist/mnist-cloud

    #- copy standalone instance(local) default project(self)'s mnist-local model to cloud instance(pre-k8s) mnist project with standalone instance model name 'mnist-local'
    swcli model cp mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy standalone instance(local) default project(self)'s mnist-local model to cloud instance(pre-k8s) mnist project without 'cloud://' prefix
    swcli model cp mnist-local/version/latest pre-k8s/project/mnist

    #- copy standalone instance(local) project(myproject)'s mnist-local model to cloud instance(pre-k8s) mnist project with standalone instance model name 'mnist-local'
    swcli model cp local/project/myproject/model/mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy without some tags
    swcli model cp mnist cloud://cloud.starwhale.cn/project/starwhale:public --ignore-tag t1

    swcli model diff

    swcli [全局选项] model diff [选项] <MODEL VERSION> <MODEL VERSION>

    model diff 比较同一模型的两个版本之间的差异。

    MODEL VERSION 是一个模型URI

    选项必填项类型默认值说明
    --show-detailsNBooleanFalse使用该选项输出详细的差异信息。

    swcli model extract

    swcli [全局选项] model extract [选项] <MODEL> <TARGET_DIR>

    model extract 能够对将Starwhale 模型解压到指定目录中,方便进行后续改造。

    MODEL 是一个模型URI

    选项必填项类型默认值说明
    --force-fNBooleanFalse如果使用了该选项,会强制覆盖目标目录已经存在的模型解压文件。

    Starwhale 模型解压的例子

    #- extract mnist model package to current directory
    swcli model extract mnist/version/xxxx .

    #- extract mnist model package to current directory and force to overwrite the files
    swcli model extract mnist/version/xxxx . -f

    swcli model history

    swcli [全局选项] model history [选项] <MODEL>

    model history输出指定Starwhale模型的所有历史版本。

    MODEL是一个模型URI

    选项必填项类型默认值说明
    --fullnameNBooleanFalse显示完整的版本名称。如果没有使用该选项,则仅显示前 12 个字符。

    swcli model info

    swcli [全局选项] model info [选项] <MODEL>

    model info输出指定Starwhale模型版本的详细信息。

    MODEL是一个模型URI

    选项必填项类型默认值说明
    --output-filter-ofNChoice of [basic/model_yaml/manifest/files/handlers/all]basic设置输出的过滤规则,比如只显示Model的model.yaml。目前该参数仅对Standalone Instance的Model生效。

    Starwhale 模型信息查看的例子

    swcli model info mnist # show basic info from the latest version of model
    swcli model info mnist/version/v0 # show basic info from the v0 version of model
    swcli model info mnist/version/latest --output-filter=all # show all info
    swcli model info mnist -of basic # show basic info
    swcli model info mnist -of model_yaml # show model.yaml
    swcli model info mnist -of handlers # show model runnable handlers info
    swcli model info mnist -of files # show model package files tree
    swcli -o json model info mnist -of all # show all info in json format

    swcli model list

    swcli [全局选项] model list [选项]

    model list显示所有的Starwhale模型。

    选项必填项类型默认值说明
    --projectNString要查看的项目的URI。如果未指定此选项,则使用默认项目替代。
    --fullnameNBooleanFalse显示完整的版本名称。如果没有使用该选项,则仅显示前 12 个字符。
    --show-removedNBooleanFalse如果使用了该选项,则结果中会包含已删除但未被垃圾回收的模型。
    --pageNInteger1起始页码。仅限Server和Cloud实例。
    --sizeNInteger20一页中的模型数。仅限Server和Cloud实例。
    --filter-flNString仅显示符合条件的模型。该选项可以在一个命令中被多次重复使用。
    过滤器类型说明范例
    nameKey-Value模型名称前缀--filter name=mnist
    ownerKey-Value模型所有者名字--filter owner=starwhale
    latestFlag如果指定了该选项,结果中仅显示最新版本。--filter latest

    swcli model recover

    swcli [全局选项] model recover [选项] <MODEL>

    model recover 恢复以前删除的 Starwhale 模型或版本。

    MODEL是一个模型URI。如果 URI 不包含版本,则会恢复所有删除的版本。

    已经被垃圾回收或者使用 --force 选项删除的 Starwhale 模型或版本无法使用本命令恢复。

    选项必填项类型默认值说明
    --force-fNBooleanFalse如果使用了该选项,当前同名的 Starwhale 模型或版本会被强制覆盖。

    swcli model remove

    swcli [全局选项] model remove [选项] <MODEL>

    model remove 删除指定的 Starwhale 模型或某个版本。

    MODEL 是一个模型URI。如果URI不包含版本,则删除指定模型的所有版本。

    被删除的 Starwhale 模型或版本可以在垃圾回收之前通过 swcli model recover 恢复。要永久删除某个Starwhale模型或版本,您可以使用 --force 选项。

    被删除的 Starwhale 模型或版本可以通过 swcli model list --show-removed 列出。

    选项必填项类型默认值说明
    --force-fNBooleanFalse使用此选项永久删除某个Starwhale模型或版本。删除后不可恢复。

    swcli model run

    swcli [全局选项] model run [选项]

    model run 运行一个模型的 Handler。该命令提供两种模式: model URI模式和本地开发模式。 model URI模式需要一个预先构建好的模型包,本地开发模式仅需要 model 代码目录即可。

    选项必填项类型默认值说明
    --workdir-wNString在本地开发模式中使用,指定 model 代码目录地址。
    --uri-uNString在model URI模式中使用,指定 model URI。
    --handler-hNString运行的Handler索引或名字,默认运行第一个Handler。格式为序号或Handler的名字。
    --module-mNStringPython Module 的名字,是一个可以被 import 的 Python Module 路径。该参数可以被设置多次。
    --runtime-rNString运行此命令时使用的Starwhale Runtime的 URI。如果指定此选项,该命令将在 Starwhale 运行时指定的独立 Python 环境中运行。否则它将直接在 swcli 当前的 Python 环境中运行。
    --model-yaml-fNString${MODEL_DIR}/model.yamlmodel.yaml 的路径。model.yaml 对于 model run 是非必须的。
    --run-project-pNString默认的 ProjectProject URI,表示 model run 的结果存储到对应的项目中。
    --dataset-dNStringDataset URI,模型运行所需要的 Starwhale 数据集。该参数可以被设置多次。
    --dataset-head-dhNInteger0在 Standalone Instance 下,用于调试目的,一般只需要运行数据集的一部分数据即可,可以通过 --dataset-head 参数来设定。默认值为0,表示会使用数据集所有数据。
    --in-containerNBooleanFalse使用docker镜像来运行模型。此选项仅适用于 Standalone 实例。Server 和 Cloud 实例始终使用 docker 镜像。如果指定的 runtime 是基于 docker 镜像构建的,此选项总是为真。
    --forbid-snapshot-fsNBooleanFalse当在model URI模式下,每次模型运行,都会使用一个全新的快照目录,设置该参数后直接使用模型的 workdir 目录作为运行目录。本地开发模式下,此参数不生效,每次运行都是在 --workdir 指定的目录中。
    -- --user-arbitrary-argsNString你在handlers中预设的参数 赋值.

    Starwhale 模型运行的例子

    # --> run by model uri
    # run the first handler from model uri
    swcli model run -u mnist/version/latest
    # run index id(1) handler from model uri
    swcli model run --uri mnist/version/latest --handler 1
    # run index fullname(mnist.evaluator:MNISTInference.cmp) handler from model uri
    swcli model run --uri mnist/version/latest --handler mnist.evaluator:MNISTInference.cmp

    # --> run by the working directory, which does not build model package yet. Make local debug happy.
    # run the first handler from the working directory, use the model.yaml in the working directory
    swcli model run -w .
    # run index id(1) handler from the working directory, search mnist.evaluator module and model.yaml handlers(if existed) to get runnable handlers
    swcli model run --workdir . --module mnist.evaluator --handler 1
    # run index fullname(mnist.evaluator:MNISTInference.cmp) handler from the working directory, search mnist.evaluator module to get runnable handlers
    swcli model run --workdir . --module mnist.evaluator --handler mnist.evaluator:MNISTInference.cmp
    # run the f handler in th.py from the working directory with the args defined in th:f
    # @handler()
    # def f(
    # x=ListInput(IntInput()),
    # y=2,
    # mi=MyInput(),
    # ds=DatasetInput(required=True),
    # ctx=ContextInput(),
    # )
    swcli model run -w . -m th --handler th:f -- -x 2 -x=1 --mi=blab-la --ds mnist

    # --> run with dataset of head 10
    swcli model run --uri mnist --dataset-head 10 --dataset mnist

    swcli model serve

    swcli [全局选项] model serve [选项]

    model serve 命令可以以Web Server方式运行模型,并提供简易的 Web 交互界面。

    选项必填项类型默认值说明
    --workdir-wNString在本地开发模式中使用,指定 model 代码目录地址。
    --uri-uNString在 model URI模式中使用,指定 model URI。
    --runtime-rNString运行此命令时使用的Starwhale Runtime的 URI。如果指定此选项,该命令将在 Starwhale 运行时指定的独立 Python 环境中运行。否则它将直接在 swcli 当前的 Python 环境中运行。
    --model-yaml-fNString${MODEL_DIR}/model.yamlmodel.yaml 的路径。model.yaml 对于 model serve 是非必须的。
    --module-mNStringPython Module 的名字,是一个可以被 import 的 Python Module 路径。该参数可以被设置多次。
    --hostNString127.0.0.1服务监听的地址
    --portNInteger8080服务监听的端口

    Starwhale 模型 Serving 的例子

    swcli model serve -u mnist
    swcli model serve --uri mnist/version/latest --runtime pytorch/version/latest

    swcli model serve --workdir . --runtime pytorch/version/v0
    swcli model serve --workdir . --runtime pytorch/version/v1 --host 0.0.0.0 --port 8080
    swcli model serve --workdir . --runtime pytorch --module mnist.evaluator

    swcli model tag

    swcli [全局选项] model tag [选项] <MODEL> [TAGS]...

    model tag将标签附加到指定的Starwhale模型版本,同时支持删除和列出所有标签的功能。可以在模型URI中使用标签替代版本ID。

    MODEL是一个模型URI

    每个模型版本可以包含任意数量的标签,但同一模型中不允许有重复的标签名称。

    选项必填项类型默认值说明
    --remove-rNBooleanFalse使用该选项删除标签
    --quiet-qNBooleanFalse使用该选项以忽略错误,例如删除不存在的标签。
    --force-add-fNBooleanFalse当向 server/cloud 实例中添加标签时,若遇到其他版本的模型已经使用该标签会提示报错,强制更新时可以使用 --force-add 参数。

    Starwhale 模型标签的例子

    #- list tags of the mnist model
    swcli model tag mnist

    #- add tags for the mnist model
    swcli model tag mnist t1 t2
    swcli model tag cloud://cloud.starwhale.cn/project/public:starwhale/model/mnist/version/latest t1 --force-add
    swcli model tag mnist t1 --quiet

    #- remove tags for the mnist model
    swcli model tag mnist -r t1 t2
    swcli model tag cloud://cloud.starwhale.cn/project/public:starwhale/model/mnist --remove t1
    - + \ No newline at end of file diff --git a/zh/next/reference/swcli/project/index.html b/zh/next/reference/swcli/project/index.html index 2b4fe11f2..7e90e8303 100644 --- a/zh/next/reference/swcli/project/index.html +++ b/zh/next/reference/swcli/project/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    swcli project

    Overview

    swcli [全局选项] project [选项] <子命令> [参数]...

    project命令包括以下子命令:

    • create(add, new)
    • info
    • list(ls)
    • recover
    • remove(ls)
    • use(select)

    swcli project create

    swcli [全局选项] project create <PROJECT>

    project create 创建一个新的项目。

    PROJECT 是一个项目URI

    swcli project info

    swcli [全局选项] project info [选项] <PROJECT>

    project info 输出指定项目的详细信息。

    PROJECT 是一个项目URI

    swcli project list

    swcli [全局选项] project list [选项]

    project list 显示所有的项目。

    选项必填项类型默认值说明
    --instanceNString要显示的实例 URI。如果不指定该选项,则显示默认实例.
    --show-removedNBooleanFalse如果使用了该选项,则结果中会包含已删除但未被垃圾回收的项目。
    --pageNInteger1起始页码。仅限 Server 和 Cloud 实例。
    --sizeNInteger20一页中的项目数。仅限 Server 和 Cloud 实例。

    swcli project recover

    swcli [全局选项] project recover [选项] <PROJECT>

    project recover 恢复以前删除的项目。

    PROJECT 是一个项目URI

    已经被垃圾回收或者使用 --force 选项删除的项目无法使用本命令恢复。

    swcli project remove

    swcli [全局选项] project remove [选项] <PROJECT>

    project remove 删除指定的项目。

    PROJECT 是一个项目URI

    被删除的项目可以在垃圾回收之前通过 swcli project recover 恢复。要永久删除某个项目,您可以使用 --force 选项。

    被删除的项目可以通过 swcli project list --show-removed 列出。

    选项必填项类型默认值说明
    --force-fNBooleanFalse使用此选项永久删除某个 Starwhale 模型或版本。删除后不可恢复。

    swcli project use

    swcli [全局选项] project use <PROJECT>

    project use 将指定的项目设置为默认项目。如果要指定 Server/Cloud 实例上的项目,您需要先登录才能运行本命令。

    - + \ No newline at end of file diff --git a/zh/next/reference/swcli/runtime/index.html b/zh/next/reference/swcli/runtime/index.html index be3e820cc..a8f38af28 100644 --- a/zh/next/reference/swcli/runtime/index.html +++ b/zh/next/reference/swcli/runtime/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    swcli runtime

    概述

    swcli [全局选项] runtime [选项] <SUBCOMMAND> [参数]...

    runtime 命令包括以下子命令:

    • activate(actv)
    • build
    • copy(cp)
    • dockerize
    • extract
    • history
    • info
    • list(ls)
    • recover
    • remove(rm)
    • tag

    swcli runtime activate

    swcli [全局选项] runtime activate [选项] <RUNTIME>

    runtime activate 根据指定的运行时创建一个全新的 Python 环境,类似 source venv/bin/activateconda activate xxx 的效果。关闭当前 shell 或切换到其他 shell 后,需要重新激活 Runtime。URI 参数为 Runtime URI。

    对于已经激活的 Starwhale 运行时,如果想要退出该环境,需要在 venv 环境中执行 deactivate 命令或conda环境中执行 conda deactivate 命令。

    runtime activate 命令首次激活环境的时候,会根据 Starwhale 运行时的定义,构建一个 Python 隔离环境,并下载相关的 Python Packages ,可能会花费比较的时间。

    swcli runtime build

    swcli [全局选项] runtime build [选项]

    runtime build 命令可以从多种环境或 runtime.yaml ,构建一个可以分享、可以复现的适合 ML/DL 领域的运行环境。

    参数说明

    • 运行时构建方式的相关参数:
    选项必填项类型默认值说明
    -c--condaNString通过 conda env name 寻找对应的 conda 环境,导出 Python 依赖后生成 Starwhale 运行时。
    -cp--conda-prefixNString通过 conda env prefix 路径寻找对应的 conda 环境,导出 Python 依赖后生成 Starwhale 运行时。
    -v--venvNString通过 venv 目录地址寻找对应的 venv 环境,导出 Python 依赖后生成 Starwhale 运行时。
    -s--shellNString根据当前 shell 环境,导出 Python 依赖后生成 Starwhale 运行时。
    -y--yamlNcwd 目录的 runtime.yaml根据用户自定义的 runtime.yaml 构建 Starwhale 运行时。
    -d--dockerNString将 docker image 作为 Starwhale 运行时。

    运行时构建方式的相关参数是互斥的,只能指定一种方式,如果不指定,则会采用 --yaml 方式读取 cwd 目录下的 runtime.yaml 文件进行 Starwhale 运行时的构建。

    • 其他参数:
    选项必填项作用域类型默认值说明
    --project-pN全局String默认项目项目URI
    -del--disable-env-lockNruntime.yaml 模式BooleanFalse是否安装 runtime.yaml 中的依赖,并锁定相关依赖的版本信息。默认会锁定依赖。
    -nc--no-cacheNruntime.yaml 模式BooleanFalse是否删除隔离环境,全新安装相关依赖。默认会在之前的隔离环境中安装依赖。
    --cudaNconda/venv/shell 模式Choice[11.3/11.4/11.5/11.6/11.7/]CUDA 版本,默认不使用 CUDA。
    --cudnnNconda/venv/shell 模式Choice[8/]cuDNN 版本,默认不使用 cuDNN。
    --archNconda/venv/shell 模式Choice[amd64/arm64/noarch]noarch体系结构
    -dpo--dump-pip-optionsN全局BooleanFalse~/.pip/pip.conf 导出 pip 的配置参数。
    -dcc--dump-condarcN全局BooleanFalse~/.condarc 导出 conda 的配置参数。
    -t--tagN全局String用户自定义标签,可以指定多次。

    Starwhale 运行时构建的例子

    #- from runtime.yaml:
    swcli runtime build # use the current directory as the workdir and use the default runtime.yaml file
    swcli runtime build -y example/pytorch/runtime.yaml # use example/pytorch/runtime.yaml as the runtime.yaml file
    swcli runtime build --yaml runtime.yaml # use runtime.yaml at the current directory as the runtime.yaml file
    swcli runtime build --tag tag1 --tag tag2

    #- from conda name:
    swcli runtime build -c pytorch # lock pytorch conda environment and use `pytorch` as the runtime name
    swcli runtime build --conda pytorch --name pytorch-runtime # use `pytorch-runtime` as the runtime name
    swcli runtime build --conda pytorch --cuda 11.4 # specify the cuda version
    swcli runtime build --conda pytorch --arch noarch # specify the system architecture

    #- from conda prefix path:
    swcli runtime build --conda-prefix /home/starwhale/anaconda3/envs/pytorch # get conda prefix path by `conda info --envs` command

    #- from venv prefix path:
    swcli runtime build -v /home/starwhale/.virtualenvs/pytorch
    swcli runtime build --venv /home/starwhale/.local/share/virtualenvs/pytorch --arch amd64

    #- from docker image:
    swcli runtime build --docker pytorch/pytorch:1.9.0-cuda11.1-cudnn8-runtime # use the docker image as the runtime directly

    #- from shell:
    swcli runtime build -s --cuda 11.4 --cudnn 8 # specify the cuda and cudnn version
    swcli runtime build --shell --name pytorch-runtime # lock the current shell environment and use `pytorch-runtime` as the runtime name

    swcli runtime copy

    swcli [全局选项] runtime copy [选项] <SRC> <DEST>

    runtime copy 将 runtime 从 SRC 复制到 DEST,可以实现不同实例之间的运行时分享。这里 SRCDEST 都是运行时URI

    Starwhale 运行时复制时,默认会带上用户自定义的所有标签,可以使用 --ignore-tag 参数,忽略某些标签。另外,latest^v\d+$ 标签是 Starwhale 系统内建标签,只在当前实例中使用,不会拷贝到其他实例中。

    选项必填项类型默认值说明
    --force-fNBooleanFalse如果为true,DEST已经存在时会被强制覆盖。否则此命令会显示一条错误消息。另外,如果复制时携带的标签已经被其他版本使用,通过该参数可以强制更新标签到此版本上。
    -i--ignore-tagNString可以指定多次,忽略多个用户自定义标签。

    Starwhale 运行时复制的例子

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local project(myproject) with a new runtime name 'mnist-local'
    swcli runtime cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq local/project/myproject/mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local default project(self) with the cloud instance runtime name 'mnist-cloud'
    swcli runtime cp cloud://pre-k8s/project/runtime/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq .

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local project(myproject) with the cloud instance runtime name 'mnist-cloud'
    swcli runtime cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq . -dlp myproject

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local default project(self) with a runtime name 'mnist-local'
    swcli runtime cp cloud://pre-k8s/project/runtime/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local project(myproject) with a runtime name 'mnist-local'
    swcli runtime cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local -dlp myproject

    #- copy standalone instance(local) default project(self)'s mnist-local runtime to cloud instance(pre-k8s) mnist project with a new runtime name 'mnist-cloud'
    swcli runtime cp mnist-local/version/latest cloud://pre-k8s/project/mnist/mnist-cloud

    #- copy standalone instance(local) default project(self)'s mnist-local runtime to cloud instance(pre-k8s) mnist project with standalone instance runtime name 'mnist-local'
    swcli runtime cp mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy standalone instance(local) default project(self)'s mnist-local runtime to cloud instance(pre-k8s) mnist project without 'cloud://' prefix
    swcli runtime cp mnist-local/version/latest pre-k8s/project/mnist

    #- copy standalone instance(local) project(myproject)'s mnist-local runtime to cloud instance(pre-k8s) mnist project with standalone instance runtime name 'mnist-local'
    swcli runtime cp local/project/myproject/runtime/mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy without some tags
    swcli runtime cp pytorch cloud://cloud.starwhale.cn/project/starwhale:public --ignore-tag t1

    swcli runtime dockerize

    swcli [全局选项] runtime dockerize [选项] <RUNTIME>

    runtime dockerize 基于指定的 runtime 创建一个 docker 镜像。Starwhale 使用 docker buildx 来创建镜像。运行此命令需要预先安装 Docker 19.03 以上的版本。

    RUNTIME是一个运行时URI

    选项必填项类型默认值说明
    --tag or -tNStringDocker镜像的tag,该选项可以重复多次。
    --pushNBooleanFalse是否将创建的镜像推送到docker registry。
    --platformNStringamd64镜像的运行平台,可以是amd64或者arm64。该选项可以重复多次用于创建多平台镜像。
    --dry-runNBooleanFalse只生成 Dockerfile 不实际生成和推送镜像。

    swcli runtime extract

    swcli [全局选项] runtime extract [选项] <RUNTIME>

    Starwhale 运行时以压缩包的方式分发,使用 runtime extract 命令可以解压运行时 Package,然后进行后续的自定义修改。

    选项必填项类型默认值说明
    --force-fNBooleanFalse如果目标目录已经有解压好的 Starwhale 运行时,是否删除后重新解压。
    --target-dirNString自定义解压的目录,如果不指定则会放到 Starwhale 默认的运行时 workdir 目录中,命令输出日志中会提示。

    swcli runtime history

    swcli [全局选项] runtime history [选项] <RUNTIME>

    runtime history输出指定Starwhale运行时的所有历史版本。

    RUNTIME是一个运行时URI

    选项必填项类型默认值说明
    --fullnameNBooleanFalse显示完整的版本名称。如果没有使用该选项,则仅显示前 12 个字符。

    swcli runtime info

    swcli [全局选项] runtime info [选项] <RUNTIME>

    runtime info输出指定Starwhale运行时版本的详细信息。

    RUNTIME是一个运行时URI

    选项必填项类型默认值说明
    --output-filter-ofNChoice of [basic/runtime_yaml/manifest/lock/all]basic设置输出的过滤规则,比如只显示Runtime的runtime.yaml。目前该参数仅对Standalone Instance的Runtime生效。

    Starwhale 运行时查看详情的例子

    swcli runtime info pytorch # show basic info from the latest version of runtime
    swcli runtime info pytorch/version/v0 # show basic info
    swcli runtime info pytorch/version/v0 --output-filter basic # show basic info
    swcli runtime info pytorch/version/v1 -of runtime_yaml # show runtime.yaml content
    swcli runtime info pytorch/version/v1 -of lock # show auto lock file content
    swcli runtime info pytorch/version/v1 -of manifest # show _manifest.yaml content
    swcli runtime info pytorch/version/v1 -of all # show all info of the runtime

    swcli runtime list

    swcli [全局选项] runtime list [选项]

    runtime list显示所有的 Starwhale 运行时。

    选项必填项类型默认值说明
    --projectNString要查看的项目的URI。如果未指定此选项,则使用默认项目替代。
    --fullnameNBooleanFalse显示完整的版本名称。如果没有使用该选项,则仅显示前 12 个字符。
    --show-removedNBooleanFalse如果使用了该选项,则结果中会包含已删除但未被垃圾回收的运行时。
    --pageNInteger1起始页码。仅限Server和Cloud实例。
    --sizeNInteger20一页中的运行时数量。仅限Server和Cloud实例。
    --filter-flNString仅显示符合条件的运行时。该选项可以在一个命令中被多次重复使用。
    过滤器类型说明范例
    nameKey-Value运行时名称前缀--filter name=pytorch
    ownerKey-Value运行时所有者名字--filter owner=starwhale
    latestFlag如果指定了该选项,结果中仅显示最新版本。--filter latest

    swcli runtime recover

    swcli [全局选项] runtime recover [选项] <RUNTIME>

    runtime recover 命令可以恢复以前删除的 Starwhale 运行时。

    RUNTIME是一个运行时URI。如果URI不包含版本,则会恢复所有删除的版本。

    已经被垃圾回收或者使用 --force 选项删除的 Starwhale 运行时或版本无法使用本命令恢复。

    选项必填项类型默认值说明
    --force-fNBooleanFalse如果使用了该选项,当前同名的Starwhale运行时或版本会被强制覆盖。

    swcli runtime remove

    swcli [全局选项] runtime remove [选项] <RUNTIME>

    runtime remove 命令可以删除指定的 Starwhale 运行时或某个版本。

    RUNTIME 是一个运行时URI。如果 URI 不包含版本,则删除所有版本。

    被删除的 Starwhale 运行时或版本可以在垃圾回收之前通过 swcli runtime recover 命令恢复。要永久删除某个 Starwhale 运行时或版本,您可以使用 --force 选项。

    被删除的 Starwhale 运行时或版本可以通过 swcli runtime list --show-removed 列出。

    选项必填项类型默认值说明
    --force-fNBooleanFalse使用此选项永久删除某个 Starwhale 运行时或版本。删除后不可恢复。

    swcli runtime tag

    swcli [全局选项] runtime tag [选项] <RUNTIME> [TAGS]...

    runtime tag 命令将标签附加到指定的 Starwhale 运行时版本,同时支持删除和列出所有标签的功能。可以在运行时URI中使用标签替代版本 ID。

    RUNTIME 是一个运行时URI

    每个运行时版本可以包含任意数量的标签,但同一运行时中不允许有重复的标签名称。

    runtime tag仅适用于 Standalone 实例.

    选项必填项类型默认值说明
    --remove-rNBooleanFalse使用该选项删除标签
    --quiet-qNBooleanFalse使用该选项以忽略错误,例如删除不存在的标签。
    --force-add-fNBooleanFalse当向 server/cloud 实例中添加标签时,若遇到其他版本的运行时已经使用该标签会提示报错,强制更新时可以使用 --force-add 参数。

    Starwhale 运行时标签的例子

    #- list tags of the pytorch runtime
    swcli runtime tag pytorch

    #- add tags for the pytorch runtime
    swcli runtime tag mnist t1 t2
    swcli runtime tag cloud://cloud.starwhale.cn/project/public:starwhale/runtime/pytorch/version/latest t1 --force-add
    swcli runtime tag mnist t1 --quiet

    #- remove tags for the pytorch runtime
    swcli runtime tag mnist -r t1 t2
    swcli runtime tag cloud://cloud.starwhale.cn/project/public:starwhale/runtime/pytorch --remove t1
    - + \ No newline at end of file diff --git a/zh/next/reference/swcli/utilities/index.html b/zh/next/reference/swcli/utilities/index.html index 50e9aa7e8..537231c3e 100644 --- a/zh/next/reference/swcli/utilities/index.html +++ b/zh/next/reference/swcli/utilities/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    其他命令

    swcli gc

    swcli [全局选项] gc [选项]

    gc根据内部的垃圾回收策略清理已经被删除的项目、模型、数据集和运行时。

    选项必填项类型默认值说明
    --dry-runNBooleanFalse如果为真,仅输出将被删除的对象而不清理。
    --yesNBooleanFalse跳过所有需要确认的项目。

    swcli check

    swcli [全局选项] check

    检查 swcli 命令的外部依赖是否满足条件,目前主要检查 Docker 和 Conda。

    swcli completion install

    swcli [全局选项] completion install <SHELL_NAME>

    安装 swcli 命令补全,目前支持 bash, zsh 和 fish。如果不指定 SHELL_NAME,则尝试主动探测当前shell类型。

    swcli config edit

    swcli [全局选项] config edit

    编辑 Starwhale 配置文件,即 ~/.config/starwhale/config.yaml

    swcli ui

    swcli [全局选项] ui <INSTANCE>

    打开对应实例的Web页面。

    - + \ No newline at end of file diff --git a/zh/next/runtime/index.html b/zh/next/runtime/index.html index 5359f3081..46fd43e8d 100644 --- a/zh/next/runtime/index.html +++ b/zh/next/runtime/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    Starwhale 运行时

    overview

    概览

    Starwhale 运行时能够针对运行Python程序,提供一种可复现、可分享的运行环境。使用 Starwhale 运行时,可以非常容易的与他人分享,并且能在 Starwhale Server 和 Starwhale Cloud 实例上使用 Starwhale 运行时。

    Starwhale 运行时使用 venv, conda 和 docker 等基础技术,如果您当前正在使用这些技术,可以非常容易的将这个环境转化为 Starwhale 运行时。

    对于本地环境,Starwhale 运行时支持非常容易的多种环境管理和切换。Starwhale 运行时包含基础镜像和环境依赖两个部分。

    基础镜像

    Starwhale 基础镜像中会安装 Python, CUDA, cuDNN 和其他一些机器学习开发中必要的基础库。Starwhale 运行时提供多种基础镜像供选择,列表如下:

    • 体系结构:
      • X86 (amd64)
      • Arm (aarch64)
    • 操作系统:
      • Ubuntu 20.04 LTS (ubuntu:20.04)
    • Python:
      • 3.7
      • 3.8
      • 3.9
      • 3.10
      • 3.11
    • CUDA:
      • CUDA 11.3 + cuDNN 8.4
      • CUDA 11.4 + cuDNN 8.4
      • CUDA 11.5 + cuDNN 8.4
      • CUDA 11.6 + cuDNN 8.4
      • CUDA 11.7

    runtime.yaml 通过相关设置来决定使用何种基础镜像。

    - + \ No newline at end of file diff --git a/zh/next/runtime/yaml/index.html b/zh/next/runtime/yaml/index.html index 950f48a4a..1685455af 100644 --- a/zh/next/runtime/yaml/index.html +++ b/zh/next/runtime/yaml/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    runtime.yaml 使用指南

    runtime.yaml 是构建 Starwhale 运行时的描述文件,用户可以细粒度的定义 Starwhale 运行时的各种属性。当使用 swcli runtime build 命令中 yaml 模式时,需要提供 runtime.yaml 文件。

    使用示例

    最简示例

    dependencies:
    - pip:
    - numpy
    name: simple-test

    定义一个以 venv 作为Python 包隔离方式,安装numpy依赖的 Starwhale 运行时。

    llama2 示例

    name: llama2
    mode: venv
    environment:
    arch: noarch
    os: ubuntu:20.04
    cuda: 11.7
    python: "3.10"
    dependencies:
    - pip:
    - torch
    - fairscale
    - fire
    - sentencepiece
    - gradio >= 3.37.0
    # external starwhale dependencies
    - starwhale[serve] >= 0.5.5

    完整字段示例

    # [required]The name of Starwhale Runtime
    name: demo
    # [optional]The mode of Starwhale Runtime: venv or conda. Default is venv.
    mode: venv
    # [optional]The configurations of pip and conda.
    configs:
    # If you do not use conda, ignore this field.
    conda:
    condarc: # custom condarc config file
    channels:
    - defaults
    show_channel_urls: true
    default_channels:
    - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main
    - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/r
    - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/msys2
    custom_channels:
    conda-forge: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
    pytorch: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
    pytorch-lts: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
    nvidia: https://mirrors.aliyun.com/anaconda/cloud
    ssl_verify: false
    default_threads: 10
    pip:
    # pip config set global.index-url
    index_url: https://example.org/
    # pip config set global.extra-index-url
    extra_index_url: https://another.net/
    # pip config set install.trusted-host
    trusted_host:
    - example.org
    - another.net
    # [optional] The definition of the environment.
    environment:
    # Now it must be ubuntu:20.04
    os: ubuntu:20.04
    # CUDA version. possible values: 11.3, 11.4, 11.5, 11.6, 11.7
    cuda: 11.4
    # Python version. possible values: 3.7, 3.8, 3.9, 3.10, 3.11
    python: 3.8
    # Define your custom base image
    docker:
    image: mycustom.com/docker/image:tag
    # [required] The dependencies of the Starwhale Runtime.
    dependencies:
    # If this item is present, conda env create -f conda.yml will be executed
    - conda.yaml
    # If this item is present, pip install -r requirements.txt will be executed before installing other pip packages
    - requirements.txt
    # Packages to be install with conda. venv mode will ignore the conda field.
    - conda:
    - numpy
    - requests
    # Packages to be installed with pip. The format is the same as requirements.txt
    - pip:
    - pillow
    - numpy
    - deepspeed==0.9.0
    - safetensors==0.3.0
    - transformers @ git+https://github.com/huggingface/transformers.git@3c3108972af74246bc3a0ecf3259fd2eafbacdef
    - peft @ git+https://github.com/huggingface/peft.git@fcff23f005fc7bfb816ad1f55360442c170cd5f5
    - accelerate @ git+https://github.com/huggingface/accelerate.git@eba6eb79dc2ab652cd8b44b37165a4852768a8ac
    # Additional wheels packages to be installed when restoring the runtime
    - wheels:
    - dummy-0.0.0-py3-none-any.whl
    # Additional files to be included in the runtime
    - files:
    - dest: bin/prepare.sh
    name: prepare
    src: scripts/prepare.sh
    # Run some custom commands
    - commands:
    - apt-get install -y libgl1
    - touch /tmp/runtime-command-run.flag
    - + \ No newline at end of file diff --git a/zh/next/server/guides/server_admin/index.html b/zh/next/server/guides/server_admin/index.html index e99cbc482..eb772eba2 100644 --- a/zh/next/server/guides/server_admin/index.html +++ b/zh/next/server/guides/server_admin/index.html @@ -10,14 +10,14 @@ - +
    版本:WIP

    Starwhale Server 系统设置

    超级管理员密码重置

    一旦您忘记了超级管理员的密码, 您可以通过下面的SQL语句将密码重置为 abcd1234

    update user_info set user_pwd='ee9533077d01d2d65a4efdb41129a91e', user_pwd_salt='6ea18d595773ccc2beacce26' where id=1

    重置后,您可以使用上述密码登录到console。 然后再次修改密码为您想要的密码。

    系统设置

    您可以在 Starwhale Server Web 界面中对系统设置进行更改,目前支持runtime的docker镜像源修改以及资源池的划分等。下面是系统设置的一个例子:

    dockerSetting:
    registryForPull: "docker-registry.starwhale.cn/star-whale"
    registryForPush: ""
    userName: ""
    password: ""
    insecure: true
    pypiSetting:
    indexUrl: ""
    extraIndexUrl: ""
    trustedHost: ""
    retries: 10
    timeout: 90
    imageBuild:
    resourcePool: ""
    image: ""
    clientVersion: ""
    pythonVersion: ""
    datasetBuild:
    resourcePool: ""
    image: ""
    clientVersion: ""
    pythonVersion: ""
    resourcePoolSetting:
    - name: "default"
    nodeSelector: null
    resources:
    - name: "cpu"
    max: null
    min: null
    defaults: 5.0
    - name: "memory"
    max: null
    min: null
    defaults: 3145728.0
    - name: "nvidia.com/gpu"
    max: null
    min: null
    defaults: null
    tolerations: null
    metadata: null
    isPrivate: null
    visibleUserIds: null
    storageSetting:
    - type: "minio"
    tokens:
    bucket: "users"
    ak: "starwhale"
    sk: "starwhale"
    endpoint: "http://10.131.0.1:9000"
    region: "local"
    hugeFileThreshold: "10485760"
    hugeFilePartSize: "5242880"
    - type: "s3"
    tokens:
    bucket: "users"
    ak: "starwhale"
    sk: "starwhale"
    endpoint: "http://10.131.0.1:9000"
    region: "local"
    hugeFileThreshold: "10485760"
    hugeFilePartSize: "5242880"

    镜像源设置

    Server 下发的 Tasks 都是基于 docker 实现的,Starwhale Server 支持自定义镜像源,包括 dockerSetting.registryForPushdockerSetting.registryForPull

    资源池设置

    资源池实现了集群机器分组的功能。用户在创建任务时可以通过选择资源池将自己的任务下发到想要的机器组中。资源池可以理解为 Kubernetes 中的 nodeSelector,所以当您在K8S集群中给机器打上标签后,就可以在这里配置您的 resourcePool

    存储设置

    您可以通过存储设置来配置 Starwhale Server可以访问那些存储介质:

    storageSetting:
    - type: s3
    tokens:
    - bucket: starwhale # required
    ak: access_key # required
    sk: scret_key # required
    endpoint: http://s3.region.amazonaws.com # optional
    region: region of the service # required when endpoint is empty
    hugeFileThreshold: 10485760 # bigger than 10MB will use multiple part upload
    hugeFilePartSize: 5242880 # MB part size for multiple part upload
    - type: minio
    tokens:
    - bucket: starwhale # required
    ak: access_key # required
    sk: scret_key # required
    endpoint: http://10.131.0.1:9000 # required
    region: local # optional
    hugeFileThreshold: 10485760 # bigger than 10MB will use multiple part upload
    hugeFilePartSize: 5242880 # MB part size for multiple part upload
    - type: aliyun
    tokens:
    - bucket: starwhale # required
    ak: access_key # required
    sk: scret_key # required
    endpoint: http://10.131.0.2:9000 # required
    region: local # optional
    hugeFileThreshold: 10485760 # bigger than 10MB will use multiple part upload
    hugeFilePartSize: 5242880 # MB part size for multiple part upload

    每一个 storageSetting 条目都应该有一个StorageAccessService接口的实现. Starwhale目前有四个内置的实现:

    • StorageAccessServiceAliyun 可以处理 typealiyun 或者 oss 的条目
    • StorageAccessServiceMinio 可以处理typeminio 的条目
    • StorageAccessServiceS3 可以处理 types3 的条目
    • StorageAccessServiceFile 可以处理 typefs 或者 file 的条目

    不同的实现对 tokens 的要求是不一样的. 当 typealiyunminio或者oss的时候 endpoint 是 必填的。 当 endpoint 为空并且 types3 的时候 region 必填的。 而 fs/file 类型的存储则需要 rootDirserviceProvider 作为tokens的key. 更多细节请参阅代码。

    - + \ No newline at end of file diff --git a/zh/next/server/index.html b/zh/next/server/index.html index 999e7a30a..d2cca51da 100644 --- a/zh/next/server/index.html +++ b/zh/next/server/index.html @@ -10,13 +10,13 @@ - + - + \ No newline at end of file diff --git a/zh/next/server/installation/docker-compose/index.html b/zh/next/server/installation/docker-compose/index.html index 15528e108..30575e197 100644 --- a/zh/next/server/installation/docker-compose/index.html +++ b/zh/next/server/installation/docker-compose/index.html @@ -10,14 +10,14 @@ - +
    版本:WIP

    使用Docker Compose安装Starwhale

    先决条件

    安装方法

    启动服务

    wget https://raw.githubusercontent.com/star-whale/starwhale/main/docker/compose/compose.yaml
    GLOBAL_IP=${your_accessible_ip_for_server} ; docker compose up

    GLOBAL_IP 需要是可以被所有 swcli 访问到的,包括用户实际使用的swcli和container内部的swcli. 如果不能访问,请确认您的防火墙设置.

    compose.yaml 包含了Mysql数据库,MinIO存储和Controller服务. 创建一个 compose.override.yaml, 可以覆盖 compose.yaml 中的配置. 如何配置可以参考此处

    - + \ No newline at end of file diff --git a/zh/next/server/installation/docker/index.html b/zh/next/server/installation/docker/index.html index 8fdb14193..521b934e4 100644 --- a/zh/next/server/installation/docker/index.html +++ b/zh/next/server/installation/docker/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    使用 Docker 安装 Starwhale Server

    先决条件

    • 1.19或者更高版本的Kubernetes集群用于执行任务。
    • MySQL 8.0以上版本的数据库实例用于存储元数据。
    • 兼容S3接口的对象存储,用于保存数据集、模型等。

    请确保您的Kubernetes集群上的pod可以访问Starwhale Server侦听的端口。

    为Docker准备env文件

    Starwhale Server可以通过环境变量进行配置。

    Docker的env文件模板参考此处。您可以通过修改模板来创建自己的env文件。

    准备kubeconfig文件[可选][SW_SCHEDULER=k8s]

    kubeconfig文件用于访问Kubernetes集群。 有关kubeconfig文件的更多信息,请参阅官方Kubernetes文档

    如果您安装了kubectl命令行工具,可以运行 kubectl config view 来查看您当前的配置。

    启动Docker镜像

    docker run -it -d --name starwhale-server -p 8082:8082 \
    --restart unless-stopped \
    --mount type=bind,source=<您的kubeconfig文件路径>,destination=/root/.kube/config,readonly \
    --env-file <您的env文件路径> \
    docker-registry.starwhale.cn/star-whale/server:0.5.6

    对于非中国大陆网络用户,可以使用托管在 ghcr.io 上的镜像: ghcr.io/star-whale/server

    - + \ No newline at end of file diff --git a/zh/next/server/installation/index.html b/zh/next/server/installation/index.html index 773e3b08d..5c5172aca 100644 --- a/zh/next/server/installation/index.html +++ b/zh/next/server/installation/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    Starwhale Server 安装指南

    Starwhale Server 以 Docker 镜像的形式发布。您可以直接使用 Docker 运行,也可以部署到 Kubernetes 集群上。

    - + \ No newline at end of file diff --git a/zh/next/server/installation/k8s-cluster/index.html b/zh/next/server/installation/k8s-cluster/index.html index 80c0b9aec..240c29990 100644 --- a/zh/next/server/installation/k8s-cluster/index.html +++ b/zh/next/server/installation/k8s-cluster/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    在 Kubernetes 集群中安装 Starwhale

    在私有化场景中,可以使用 Helm 将 Starwhale Server 部署到一个 Kubernetes 集群中。Starwhale Server 依赖 MySQL 数据库和对象存储的这两个基础设施依赖:

    • 生产环境中,建议提供外部高可用的 MySQL 数据库和对象存储。
    • 试用或测试环境中,可以使用 Starwhale Charts 中自带单机版的 MySQL 和 MinIO。

    先决条件

    • 1.19或者更高版本的Kubernetes集群用于执行任务。
    • Kubernetes Ingress 提供 HTTP(S) 路由。
    • Helm 3.2.0+。
    • [生产环境必需] MySQL 8.0以上版本的数据库实例用于存储元数据。
    • [生产环境必需] 兼容S3接口的对象存储,用于保存数据集、模型等。当前经过测试的对象存储服务列表:

    Helm Charts

    下载 Starwhale Helm chart

    helm repo add starwhale https://star-whale.github.io/charts
    helm repo update

    编写 values.yaml (生产环境必需)

    生产环境中,需要根据实际部署需要,设置包括 MySQL数据库,对象存储,域名和内存等参数,编写 values.yaml 是推荐的方式。下面是一个 values.yaml 的实例,供参考:

    # 设置镜像源,中国大陆网环境推荐 “docker-registry.starwhale.cn” 地址。其他网络环境可以忽略该设置项,会使用ghcr.io 镜像源: https://github.com/orgs/star-whale/packages 。
    image:
    registry: docker-registry.starwhale.cn
    org: star-whale

    # 生产环境中依赖的外部 MySQL 服务,MySQL 版本需要大于8.0
    externalMySQL:
    host: 10.0.1.100 # Kubernetes 集群中可以访问的数据库IP地址或域名
    port: 3306
    username: "your-username"
    password: "your-password"
    database: starwhale # 需要提前创建数据库,名字自由指定,默认字符集即可。上面指定的数据库用户需要对这个 dataset 有读写权限

    # 生产环境中依赖的外部 S3 协议的对象存储服务
    externalOSS:
    host: ks3-cn-beijing.ksyuncs.com # Kubernetes 集群和 Standalone 实例中同时可以访问的对象存储IP地址或域名
    port: 80
    accessKey: "your-ak"
    secretKey: "your-sk"
    defaultBuckets: starwhale # 需要提前创建Bucket,名字自由指定。上面指定的ak/sk 需要对这个 Bucket 有读写权限
    region: BEIJING # 对象存储对应的 region,默认为 local

    # 生产环境中指定了外部对象存储后,不需要自带的单机版本 MinIO
    minio:
    enabled: false

    # 生产环境中指定了外部 MySQL 后,不需要自带的单机版本 MySQL
    mysql:
    enabled: false

    controller:
    containerPort: 8082
    storageType: "ksyun" # 对象存储的类型 minio/s3/ksyun/baidu/tencent/aliyun
    ingress:
    enabled: true
    ingressClassName: nginx # 与 Kubernetes 集群中 Ingress Controller 对应
    host: server-domain-name # Server 外部访问的域名
    path: /

    # 生产环境中推荐 Starwhale Server 至少32G内存和8核CPU。
    resources:
    controller:
    limits:
    memory: 32G
    cpu: 8
    requests:
    memory: 32G
    cpu: 8

    # Server 中运行评测是需要下载 Starwhale Runtime 中定义的 Python Package,推荐设置符合实际网络环境的 PYPI mirror。后续也可以在 Server System Settings页面中修改。
    mirror:
    pypi:
    enabled: true
    indexUrl: "https://mirrors.aliyun.com/pypi/simple/"
    extraIndexUrl: "https://pypi.tuna.tsinghua.edu.cn/simple/"
    trustedHost: "mirrors.aliyun.com pypi.tuna.tsinghua.edu.cn"

    部署/更新 Starwhale Server

    首次部署或更新都可以使用如下命令,会自动创建一个 starwhale 的 kubernetes namespace。 values.custom.yaml 是根据实际集群的需要,编写的values.yaml。

    helm upgrade --devel --install starwhale starwhale/starwhale --namespace starwhale --create-namespace -f values.custom.yaml

    如果您安装了kubectl命令行工具,您可以运行 kubectl get pods -n starwhale 来检查是否所有 pod 都在正常运行中。

    卸载 Starwhale Server

    helm delete starwhale
    - + \ No newline at end of file diff --git a/zh/next/server/installation/minikube/index.html b/zh/next/server/installation/minikube/index.html index 38a77f0cf..9b87ac683 100644 --- a/zh/next/server/installation/minikube/index.html +++ b/zh/next/server/installation/minikube/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    在 Minikube 中安装 Starwhale

    先决条件

    启动 Minikube

    minikube start --kubernetes-version=1.25.3 --image-repository=docker-registry.starwhale.cn/minikube --base-image=docker-registry.starwhale.cn/minikube/k8s-minikube/kicbase:v0.0.42

    minikube addons enable ingress --images="KubeWebhookCertgenPatch=ingress-nginx/kube-webhook-certgen:v20231011-8b53cabe0,KubeWebhookCertgenCreate=ingress-nginx/kube-webhook-certgen:v20231011-8b53cabe0,IngressController=ingress-nginx/controller:v1.9.4"

    目前 docker-registry.starwhale.cn/minikube 目前只缓存了 Kubernetes 1.25.3 的镜像,也可以使用阿里云提供的 Minikube 镜像:

    minikube start --image-mirror-country=cn

    minikube addons enable ingress --images="KubeWebhookCertgenPatch=kube-webhook-certgen:v20231011-8b53cabe0,KubeWebhookCertgenCreate=kube-webhook-certgen:v20231011-8b53cabe0,IngressController=nginx-ingress-controller:v1.9.4" --registries="KubeWebhookCertgenPatch=registry.cn-hangzhou.aliyuncs.com/google_containers,KubeWebhookCertgenCreate=registry.cn-hangzhou.aliyuncs.com/google_containers,IngressController=registry.cn-hangzhou.aliyuncs.com/google_containers"

    对于非中国大陆网络用户,可以使用如下命令:

    minikube start --addons ingress

    如果在您的机器上没有安装 kubectl,可以使用 Minikube 自带的 kubectl: minikube kubectl 或 bashrc中增加 alias kubectl="minikube kubectl --"

    安装 Starwhale Server

    helm repo add starwhale https://star-whale.github.io/charts
    helm repo update
    helm pull starwhale/starwhale --untar --untardir ./charts

    helm upgrade --install starwhale ./charts/starwhale -n starwhale --create-namespace -f ./charts/starwhale/values.minikube.cn.yaml

    对于非中国大陆网络用户,可以使用 values.minikube.global.yaml,命令如下:

    helm upgrade --install starwhale ./charts/starwhale -n starwhale --create-namespace -f ./charts/starwhale/values.minikube.global.yaml

    当成功安装后,会有类似如下的提示信息输出:

        Release "starwhale" has been upgraded. Happy Helming!
    NAME: starwhale
    LAST DEPLOYED: Tue Feb 14 16:25:03 2023
    NAMESPACE: starwhale
    STATUS: deployed
    REVISION: 14
    NOTES:
    ******************************************
    Chart Name: starwhale
    Chart Version: 0.5.6
    App Version: latest
    Starwhale Image:
    - server: ghcr.io/star-whale/server:latest

    ******************************************
    Controller:
    - visit: http://controller.starwhale.svc
    Minio:
    - web visit: http://minio.starwhale.svc
    - admin visit: http://minio-admin.starwhale.svc
    MySQL:
    - port-forward:
    - run: kubectl port-forward --namespace starwhale svc/mysql 3306:3306
    - visit: mysql -h 127.0.0.1 -P 3306 -ustarwhale -pstarwhale
    Please run the following command for the domains searching:
    echo "$(sudo minikube ip) controller.starwhale.svc minio.starwhale.svc minio-admin.starwhale.svc " | sudo tee -a /etc/hosts
    ******************************************
    Login Info:
    - starwhale: u:starwhale, p:abcd1234
    - minio admin: u:minioadmin, p:minioadmin

    *_* Enjoy to use Starwhale Platform. *_*

    检查 Starwhale Server 状态

    Minikube 方式启动 Starwhale Server 一般要用时3-5分钟,可以输出如下命令检查是否完成启动:

    kubectl get deployments -n starwhale
    NAMEREADYUP-TO-DATEAVAILABLEAGE
    controller1/1115m
    minio1/1115m
    mysql1/1115m

    本机访问的网络配置

    输出如下命令后,就可以在浏览器中通过 http://controller.starwhale.svc 访问 Starwhale Server:

    echo "$(sudo minikube ip) controller.starwhale.svc minio.starwhale.svc  minio-admin.starwhale.svc " | sudo tee -a /etc/hosts

    其他机器访问的网络配置

    • 步骤1: 在 Starwhale Server 所在机器上

      使用 socat 命令做临时的端口转发,命令如下:

      # install socat at first, ref: https://howtoinstall.co/en/socat
      sudo socat TCP4-LISTEN:80,fork,reuseaddr,bind=0.0.0.0 TCP4:`minikube ip`:80

      当您停掉socat进程后,端口转发会被禁止,其他机器的访问也会被禁止。如果想长期开启端口转发,可以使用 iptables 命令。

    • 步骤2: 在其他机器上

      在 hosts 文件添加相关域名映射,命令如下:

      # for macOSX or Linux environment, run the command in the shell.
      echo ${your_machine_ip} controller.starwhale.svc minio.starwhale.svc minio-admin.starwhale.svc " | sudo tee -a /etc/hosts

      # for Windows environment, run the command in the PowerShell with administrator permission.
      Add-Content -Path C:\Windows\System32\drivers\etc\hosts -Value "`n${your_machine_ip} controller.starwhale.svc minio.starwhale.svc minio-admin.starwhale.svc"
    - + \ No newline at end of file diff --git a/zh/next/server/installation/starwhale_env/index.html b/zh/next/server/installation/starwhale_env/index.html index a2fdf3130..193f98034 100644 --- a/zh/next/server/installation/starwhale_env/index.html +++ b/zh/next/server/installation/starwhale_env/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    Starwhale 环境变量文件示例

    ################################################################################
    # *** Required ***
    # The external Starwhale server URL. For example: https://cloud.starwhale.ai
    SW_INSTANCE_URI=

    # The listening port of Starwhale Server
    SW_CONTROLLER_PORT=8082

    # The maximum upload file size. This setting affects datasets and models uploading when copied from outside.
    SW_UPLOAD_MAX_FILE_SIZE=20480MB
    ################################################################################
    # The base URL of the Python Package Index to use when creating a runtime environment.
    SW_PYPI_INDEX_URL=http://10.131.0.1/repository/pypi-hosted/simple/

    # Extra URLs of package indexes to use in addition to the base url.
    SW_PYPI_EXTRA_INDEX_URL=

    # Space separated hostnames. When any host specified in the base URL or extra URLs does not have a valid SSL
    # certification, use this option to trust it anyway.
    SW_PYPI_TRUSTED_HOST=
    ################################################################################
    # The JWT token expiration time. When the token expires, the server will request the user to login again.
    SW_JWT_TOKEN_EXPIRE_MINUTES=43200

    # *** Required ***
    # The JWT secret key. All strings are valid, but we strongly recommend you to use a random string with at least 16 characters.
    SW_JWT_SECRET=
    ################################################################################
    # The scheduler controller to use. Valid values are:
    # docker: Controller schedule jobs by leveraging docker
    # k8s: Controller schedule jobs by leveraging Kubernetes
    SW_SCHEDULER=k8s

    # The Kubernetes namespace to use when running a task when SW_SCHEDULER is k8s
    SW_K8S_NAME_SPACE=default

    # The path on the Kubernetes host node's filesystem to cache Python packages. Use the setting only if you have
    # the permission to use host node's filesystem. The runtime environment setup process may be accelerated when the host
    # path cache is used. Leave it blank if you do not want to use it.
    SW_K8S_HOST_PATH_FOR_CACHE=

    # The ip for the containers created by Controller when SW_SCHEDULER is docker
    SW_DOCKER_CONTAINER_NODE_IP=127.0.0.1
    ###############################################################################
    # *** Required ***
    # The object storage system type. Valid values are:
    # s3: [AWS S3](https://aws.amazon.com/s3) or other s3-compatible object storage systems
    # aliyun: [Aliyun OSS](https://www.alibabacloud.com/product/object-storage-service)
    # minio: [MinIO](https://min.io)
    # file: Local filesystem
    SW_STORAGE_TYPE=

    # The path prefix for all data saved on the storage system.
    SW_STORAGE_PREFIX=
    ################################################################################
    # The following settings are only used when SW_STORAGE_TYPE is file.

    # The root directory to save data.
    # This setting is only used when SW_STORAGE_TYPE is file.
    SW_STORAGE_FS_ROOT_DIR=/usr/local/starwhale
    ################################################################################
    # The following settings are only used when SW_STORAGE_TYPE is not file.

    # *** Required ***
    # The name of the bucket to save data.
    SW_STORAGE_BUCKET=

    # *** Required ***
    # The endpoint URL of the object storage service.
    # This setting is only used when SW_STORAGE_TYPE is s3 or aliyun.
    SW_STORAGE_ENDPOINT=

    # *** Required ***
    # The access key used to access the object storage system.
    SW_STORAGE_ACCESSKEY=

    # *** Required ***
    # The secret access key used to access the object storage system.
    SW_STORAGE_SECRETKEY=

    # *** Optional ***
    # The region of the object storage system.
    SW_STORAGE_REGION=

    # Starwhale Server will use multipart upload when uploading a large file. This setting specifies the part size.
    SW_STORAGE_PART_SIZE=5MB
    ################################################################################
    # MySQL settings

    # *** Required ***
    # The hostname/IP of the MySQL server.
    SW_METADATA_STORAGE_IP=

    # The port of the MySQL server.
    SW_METADATA_STORAGE_PORT=3306

    # *** Required ***
    # The database used by Starwhale Server
    SW_METADATA_STORAGE_DB=starwhale

    # *** Required ***
    # The username of the MySQL server.
    SW_METADATA_STORAGE_USER=

    # *** Required ***
    # The password of the MySQL server.
    SW_METADATA_STORAGE_PASSWORD=
    ################################################################################

    # 用于缓存WAL文件的目录。请将其指向一个有足够空间的挂载卷或主机路径。
    # 如果不设置,WAL文件将保存在docker运行时层,当容器重启时cache数据将丢失。
    SW_DATASTORE_WAL_LOCAL_CACHE_DIR=
    - + \ No newline at end of file diff --git a/zh/next/server/project/index.html b/zh/next/server/project/index.html index 047ac1d7b..f8e92d53d 100644 --- a/zh/next/server/project/index.html +++ b/zh/next/server/project/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    How to Organize and Manage Resources with Starwhale Projects

    Project is the basic unit for organizing and managing resources (such as models, datasets, runtime environments, etc.). You can create and manage projects based on your needs. For example, you can create projects by business team, product line, or models. One user can create and participate in one or more projects.

    Project type

    There are two types of projects:

    • Private project: The project (and related resources in the project) is only visible to project members with permission. Project members can view or edit the project (as well as associated resources in the project). For more information on roles, please take a look at Roles and permissions in Starwhale.

    • Public project: The project (and related resources in the project) is visible to all Starwhale users. Project members can view or edit the project (as well as associated resources in the project). For more information on roles, please take a look at Roles and permissions in Starwhale.

    Create a project

    1. Click the Create button in the upper right corner of the project list page;
    2. Enter a name for the project. Pay attention to avoiding duplicate names. For more information, please see Names in Starwhale
    3. Select the Project Type, which is defaulted to private project and can be selected as public according to needs;
    4. Fill in the description content;
    5. To finish, Click the Submit button.

    Edit a project

    The name, privacy and description of a project can be edited.

    1. Go to the project list page and find the project that needs to be edited by searching for the project name, then click the Edit Project button;
    2. Edit the items that need to be edited;
    3. Click Submit to save the edited content;
    4. If you're editing multiple projects, repeat steps 1 through 3.

    View a project

    My projects

    On the project list page, only my projects are displayed by default. My projects refer to the projects participated in by the current users as project members or project owners.

    Project sorting

    On the project list page, all projects are supported to be sorted by "Recently visited", "Project creation time from new to old", and "Project creation time from old to new", which can be selected according to your needs.

    Delete a project

    Once a project is deleted, all related resources (such as datasets, models, runtimes, evaluations, etc.) will be deleted and cannot be restored.

    1. Enter the project list page and search for the project name to find the project that needs to be deleted. Hover your mouse over the project you want to delete, then click the Delete button;
    2. Follow the prompts, enter the relevant information, click Confirm to delete the project, or click Cancel to cancel the deletion;
    3. If you are deleting multiple projects, repeat the above steps.

    Manage project member

    Only users with the admin role can assign people to the project. The project owner defaulted to having the project owner role.

    Add a member

    1. Click Manage Members to go to the project member list page;
    2. Click the Add Member button in the upper right corner.
    3. Enter the Username you want to add, select a project role for the user in the project.
    4. Click submit to complete.
    5. If you're adding multiple members, repeat steps 1 through 4.

    Remove a member

    1. On the project list page or project overview tab, click Manage Members to go to the project member list page.
    2. Search for the username you want to delete, then click the Delete button.
    3. Click Yes to delete the user from this project, click No to cancel the deletion.
    4. If you're removing multiple members, repeat steps 1 through 3.

    Edit a member's role

    1. Hover your mouse over the project you want to edit, then click Manage Members to go to the project member list page.
    2. Find the username you want to adjust through searching, click the Project Role drop-down menu, and select a new project role. For more information on roles, please take a look at Roles and permissions in Starwhale.
    - + \ No newline at end of file diff --git a/zh/next/swcli/config/index.html b/zh/next/swcli/config/index.html index f4b21f6c4..7c70a89b3 100644 --- a/zh/next/swcli/config/index.html +++ b/zh/next/swcli/config/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    配置文件

    Standalone Instance 是安装在用户的笔记本或开发服务器上,以Linux/Mac用户为粒度进行隔离。用户通过 pip 命令安装 Starwhale Python package 并执行任意 swcli 命令后,就可以在 ~/.config/starwhale/config.yaml 中查看该用户的 Starwhale 配置。绝大多数情况加用户不需要手工修改config.yaml文件

    ~/.config/starwhale/config.yaml 文件权限为 0o600,由于里面存有密钥信息,不建议用户修改该文件权限。您可以通过swci config edit来修改配置:

    swcli config edit

    config.yaml 例子

    典型的 config.yaml 文件内容如下:

    • 当前默认默认 Instance 为 local。
    • cloud-cn/cloud-k8s/pre-k8s 三个为 Cloud Instance,local 为 Standalone Instance。
    • Standalone 本地存储的根目录为 /home/liutianwei/.starwhale
    current_instance: local
    instances:
    cloud-cn:
    sw_token: ${TOKEN}
    type: cloud
    updated_at: 2022-09-28 18:41:05 CST
    uri: https://cloud.starwhale.cn
    user_name: starwhale
    user_role: normal
    cloud-k8s:
    sw_token: ${TOKEN}
    type: cloud
    updated_at: 2022-09-19 16:10:01 CST
    uri: http://cloud.pre.intra.starwhale.ai
    user_name: starwhale
    user_role: normal
    local:
    current_project: self
    type: standalone
    updated_at: 2022-06-09 16:14:02 CST
    uri: local
    user_name: liutianwei
    pre-k8s:
    sw_token: ${TOKEN}
    type: cloud
    updated_at: 2022-09-19 18:06:50 CST
    uri: http://console.pre.intra.starwhale.ai
    user_name: starwhale
    user_role: normal
    link_auths:
    - ak: starwhale
    bucket: users
    connect_timeout: 10.0
    endpoint: http://10.131.0.1:9000
    read_timeout: 100.0
    sk: starwhale
    type: s3
    storage:
    root: /home/liutianwei/.starwhale
    version: '2.0'

    config.yaml 字段说明

    参数说明类型默认值是否必须
    current_instance默认使用的instance名字,一般用 swcli instance select 命令设置Stringself
    instances管理的 Instances,包括 Standalone, Server 和 Cloud Instance,至少会有 Standalone Instance(名称为local),Server/Cloud Instance有一个或多个,一般用 swcli instance login 登陆一个新的instance,swcli instance logout 退出一个instanceDictStandalone Instance,名称为local
    instances.{instance-alias-name}.sw_token登陆Token,只对Server/Cloud Instance生效,后续swcli对Server/Cloud Instance进行操作时都会使用该Token。需要注意Token有过期时间,默认1个月,可以在Server/Cloud Instance中进行设置StringCloud-是,Standalone-否
    instances.{instance-alias-name}.typeinstance类型,目前只能填写 cloudstandaloneChoice[String]
    instances.{instance-alias-name}.uri对于Server/Cloud Instance,uri是http/https地址,对于Standalone Instance,uri是 localString
    instances.{instance-alias-name}.user_name用户名String
    instances.{instance-alias-name}.current_project当前Instance下默认的Project是什么,在URI的表述中会作为project字段进行默认填充,可以通过 swcli project select 命令进行设置String
    instances.{instance-alias-name}.user_role用户角色Stringnormal
    instances.{instance-alias-name}.updated_at该条Instance配置更新时间时间格式字符串
    storage与本地存储相关的设置Dict
    storage.rootStandalone Instance本地存储的根目录。通常情况下,当home目录空间不足,手工把数据文件移动到其他位置时,可以修改该字段String~/.starwhale
    versionconfig.yaml的版本,目前仅支持2.0String2.0

    Standalone Instance 的文件存储结构

    ${storage.root} 目录中存储了 Standalone Instance 所有的用户数据,包括 Project、Runtime、Model、Dataset、Evaluation 等用户直接感知的数据,也包括 ObjectStore、DataStore 等 Starwhale 后台实现的存储。具体说明如下:

    +-- ${storage.root}
    | +-- .objectstore --> 存储数据集chunk文件的简单存储,使用blake2b hash算法
    | | +-- blake2b --> hash算法名称
    | | | +-- 00 --> hash2位前缀
    | | | | +-- 0019ad58... --> object文件,文件名是文件内容的hash值
    | | | +-- 05
    | +-- .datastore --> 基于pyarrow的列式存储
    | | +-- project
    | | | +-- self --> 按照project名称进行分类存储
    | | | | +-- dataset --> 数据集相关的datastore存储,一般用来存储数据集的索引信息
    | | | | +-- eval --> 模型评测结果存储
    | +-- .recover --> 软删除某个project的存储目录,可以用 `swcli project recover` 进行恢复
    | +-- .tmp --> Dataset/Model/Runtime 构建过程中临时目录
    | +-- myproject --> 用户创建的project,所有myproject信息都存储在该目录
    | +-- self --> Standalone Instance自动创建的project
    | | +-- dataset --> swds数据集存储目录
    | | +-- evaluation --> 模型评测配置文件、日志等存储目录
    | | +-- model --> swmp模型包存储目录
    | | +-- runtime --> swrt环境包存储目录
    | | +-- workdir --> 解压、复原包文件的目录
    | | | +-- model --> swmp解压后的目录
    | | | +-- runtime --> swrt解压后的目录,若进行runtime restore操作,生成的venv或conda隔离环境,也会存放在该目录中

    有时候您可能需要用到 starwhale.Link 来存储一些信息。理论上,Link里面的URI可以是任意的合法 URI(星鲸目前只支持S3协议族和HTTP),比如s3://10.131.0.1:9000/users/path。然而,有些 Link是需要鉴权才能访问的。 link_auths 就是用来存放这些鉴权信息的。

    link_auths:
    - type: s3
    ak: starwhale
    bucket: users
    region: local
    connect_timeout: 10.0
    endpoint: http://10.131.0.1:9000
    read_timeout: 100.0
    sk: starwhale

    link_auths 里面的每一条都会自动匹配您的URI。 目前 S3 类型的鉴权信息通过 bucketendpoint 来匹配 URI。

    - + \ No newline at end of file diff --git a/zh/next/swcli/index.html b/zh/next/swcli/index.html index a2865d70e..583bd434c 100644 --- a/zh/next/swcli/index.html +++ b/zh/next/swcli/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    Starwhale Client (swcli) 用户指南

    swcli 是一个命令行工具,可让您与 Starwhale 实例进行交互。您可以使用 swcli 完成 Starwhale 中几乎所有的任务。swcli 是用纯 Python3 编写的(需要 Python 3.7 ~ 3.11),因此可以通过 pip 命令轻松安装。目前,swcli 仅支持 Linux 和 macOS,Windows版本即将推出。

    - + \ No newline at end of file diff --git a/zh/next/swcli/installation/index.html b/zh/next/swcli/installation/index.html index ff1b5d000..e05939fd4 100644 --- a/zh/next/swcli/installation/index.html +++ b/zh/next/swcli/installation/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    安装指南

    swcli 命令行工具能够对各种实例完成几乎所有的操作,由于是由纯 Python3 编写,可以使用 pip 命令完成安装,本文会提供一些安装建议,帮助您获得一个干净的、无依赖冲突的 swcli Python 环境。

    安装建议

    非常不建议将 Starwhale 安装在系统的全局 Python 环境中,可能会导致 Python 的依赖冲突问题。使用 venv 或 conda 创建一个隔离的 Python 环境,并在其中安装 Starwhale,是 Python 推荐的做法。

    先决条件

    • Python3.7 ~ 3.11
    • Linux 或 macOS
    • Conda(可选)

    在Ubuntu系统中,可以运行以下命令:

    sudo apt-get install python3 python3-venv python3-pip

    #如果您想安装多个python版本
    sudo add-apt-repository -y ppa:deadsnakes/ppa
    sudo apt-get update
    sudo apt-get install -y python3.7 python3.8 python3.9 python3-pip python3-venv python3.8-venv python3.7-venv python3.9-venv

    swcli 可以在 macOS 下工作,包括 arm(M1 Chip) 和 x86(Intel Chip) 两种体系结构。但 macOS 下自带的 Python3 可能会遇到一些 Python 自身的问题,推荐使用 homebrew 进行安装:

    brew install python3

    安装 swcli

    使用venv安装

    venv 环境即可以使用 Python3 自带的 venv,也可以使用 virtualenv 工具。

    python3 -m venv ~/.cache/venv/starwhale
    source ~/.cache/venv/starwhale/bin/activate
    python3 -m pip install starwhale

    swcli --version

    sudo ln -sf "$(which swcli)" /usr/local/bin/

    使用conda安装

    conda create --name starwhale --yes  python=3.9
    conda activate starwhale
    python3 -m pip install starwhale

    swcli --version

    sudo ln -sf "$(which swcli)" /usr/local/bin/

    👏 现在,您可以在全局环境中使用 swcli 了。

    swcli 的特定场景依赖安装

    # 针对Audio处理, 主要包含soundfile库等
    python -m pip install starwhale[audio]

    # 针对Image处理,主要包含pillow库等
    python -m pip install starwhale[pillow]

    # 针对swcli model server命令
    python -m pip install starwhale[server]

    # 针对内建的Online Serving
    python -m pip install starwhale[online-serve]

    # 安装全部依赖
    python -m pip install starwhale[all]

    更新 swcli

    #适用于venv环境
    python3 -m pip install --upgrade starwhale

    #适用于conda环境
    conda run -n starwhale python3 -m pip install --upgrade starwhale

    卸载swcli

    python3 -m pip remove starwhale

    rm -rf ~/.config/starwhale
    rm -rf ~/.starwhale
    - + \ No newline at end of file diff --git a/zh/next/swcli/swignore/index.html b/zh/next/swcli/swignore/index.html index c22401fcc..eaf0894ba 100644 --- a/zh/next/swcli/swignore/index.html +++ b/zh/next/swcli/swignore/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    关于 .swignore 文件

    .swignore 文件与 .gitignore, .dockerignore 等文件类似,都是用来定义忽略某些文件或文件夹。.swignore 文件主要应用在 Starwhale 的模型构建过程中。默认情况下,swcli model build 命令 或 starwhale.model.build() Python SDK会遍历指定目录下的所有文件,并自动排除一些已知的、不适合放入模型包中的文件或目录。

    文件格式

    • swignore文件中的每一行指定一个匹配文件和目录的模式。
    • 空行不匹配任何文件,因此它可以作为可读性的分隔符。
    • 星号*匹配除斜杠以外的任何内容。
    • #开头的行作为注释。
    • 支持wildcard的表达,类似 *.jpg, *.png

    默认下自动排除的文件或目录

    如果不想排除这些文件,可以构建模型 (swcli model build 命令) 的时候增加 --add-all 参数。

    • __pycache__/
    • *.py[cod]
    • *$py.class
    • venv安装目录
    • conda安装目录

    例子

    这是MNIST示例中使用的.swignore文件:

    venv/*
    .git/*
    .history*
    .vscode/*
    .venv/*
    data/*
    .idea/*
    *.py[cod]
    - + \ No newline at end of file diff --git a/zh/next/swcli/uri/index.html b/zh/next/swcli/uri/index.html index a44f1468d..f5db4ae87 100644 --- a/zh/next/swcli/uri/index.html +++ b/zh/next/swcli/uri/index.html @@ -10,13 +10,13 @@ - +
    版本:WIP

    Starwhale 资源URI

    提示

    资源 URI 在 Starwhale Client 中被广泛使用。URI 可以引用本地实例中的资源或远程实例中的任何其他资源。 这样 Starwhale Client 就可以轻松操作任何资源。

    concepts-org.jpg

    实例URI

    实例 URI 可以是以下形式之一:

    • local: 指本地的 Standalone 实例.
    • [http(s)://]<hostname or ip>[:<port>]:指向一个 Starwhale Cloud 实例。
    • [cloud://]<cloud alias>:Server或Cloud的实例别名,可以在实例登录阶段配置。
    警告

    “local”不同于“localhost”,前者为 Standalone 实例,而后者是一个 URL ,指向本地运行的 Starwhale Server 实例。

    例子:

    # 登录Starwhale Cloud,别名为swcloud
    swcli instance login --username <your account name> --password <your password> https://cloud.starwhale.cn --alias swcloud

    # 将模型从本地实例复制到云实例
    swcli model copy mnist/version/latest swcloud/project/<your account name>:demo

    # 将运行时复制到Starwhale Server实例:http://localhost:8081
    swcli runtime copy pytorch/version/v1 http://localhost:8081/project/<your account name>:demo

    项目URI

    项目URI的格式为“[<实例URI>/project/]<project name>”。 如果未指定实例 URI,则使用当前实例。

    例子:

    swcli project select self   # 选择当前实例中的self项目
    swcli project info local/project/self # 查看本地实例中的self项目信息

    模型/数据集/运行时URI

    • 模型URI: [<项目URI>/model/]<model name>[/version/<version id|tag>].
    • 数据集URI: [<项目URI>/dataset/]<dataset name>[/version/<version id|tag>].
    • 运行时URI: [<项目URI>/runtime/]<runtime name>[/version/<version id|tag>].
    提示
    • swcli 支持更加人性化的短版本ID。您可以只键入版本ID的前几个字符,前提是它至少有四个字符长且唯一指向某个版本ID。但是,recover 命令必须使用完整的版本ID。
    • 如果未指定项目URI,将使用默认项目
    • 您始终可以使用版本标签而不是版本ID。

    例子:

    swcli model info mnist/version/hbtdenjxgm4ggnrtmftdgyjzm43tioi  # 检查模型信息,模型名称:mnist,版本:hbtdenjxgm4ggnrtmftdgyjzm43tioi
    swcli model remove mnist/version/hbtdenj # 使用短版本ID
    swcli model info mnist # 检查mnist模型信息
    swcli model run mnist --runtime pytorch-mnist --dataset mnist # 使用latest的默认tag

    作业URI

    • 格式: [<项目URI>/job/]<job id>.
    • 如果未指定项目URI,将使用默认项目。

    例子:

    swcli job info mezdayjzge3w   # 查看默认实例和默认项目中的mezdayjzge3w版本
    swcli job info local/project/self/job/mezday # 检查本地实例,self项目,作业id:mezday

    默认实例

    当项目URI中的实例部分被省略时,将使用默认实例进行替代。默认实例是由 swcli instance loginswcli instance use 指定的。

    默认项目

    当模型/数据集/运行时/评估URI的项目部分被省略时,将使用默认项目。默认项目是指通过 swcli project use 命令选择的项目。

    - + \ No newline at end of file diff --git a/zh/reference/sdk/dataset/index.html b/zh/reference/sdk/dataset/index.html index cd198b6f8..1c3626eb8 100644 --- a/zh/reference/sdk/dataset/index.html +++ b/zh/reference/sdk/dataset/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    Starwhale 数据集 SDK

    dataset

    获取 starwhale.Dataset 对象,包括创建新的数据集和加载已经存在的数据集两种方式。

    @classmethod
    def dataset(
    cls,
    uri: t.Union[str, Resource],
    create: str = _DatasetCreateMode.auto,
    readonly: bool = False,
    ) -> Dataset:

    参数

    • uri: (str 或 Resource, required)
      • Dataset URI 格式的字符串或 Resource 对象。
    • create: (str, optional)
      • 数据集创建模式,包括 auto, emptyforbid 三种方式。
        • auto 模式: 如果数据集已经存在,不会自动创建数据集;如果数据集不存在,则自动创建数据集。
        • empty 模式: 如果数据集已经存在,则抛出异常;如果数据集不存在,则自动创建数据集。
        • forbid 模式: 如果数据集已经存在,则不做任何事情;如果数据集不存在,则抛出异常。forbid 模式能确保数据集存在。
      • auto 模式是默认值。
    • readonly: (bool, optional)
      • 对于已经存在的数据集,可以指定 readonly=True 保证数据集以只读方式加载。
      • 默认值为 False

    使用示例

    from starwhale import dataset, Image

    # create a new dataset named mnist, and add a row into the dataset
    # dataset("mnist") is equal to dataset("mnist", create="auto")
    ds = dataset("mnist")
    ds.exists() # return False, "mnist" dataset is not existing.
    ds.append({"img": Image(), "label": 1})
    ds.commit()
    ds.close()

    # load a cloud instance dataset in readonly mode
    ds = dataset("cloud://remote-instance/project/starwhale/dataset/mnist", readonly=True)
    labels = [row.features.label in ds]
    ds.close()

    # load a read/write dataset with a specified version
    ds = dataset("mnist/version/mrrdczdbmzsw")
    ds[0].features.label = 1
    ds.commit()
    ds.close()

    # create an empty dataset
    ds = dataset("mnist-empty", create="empty")

    # ensure the dataset existence
    ds = dataset("mnist-existed", create="forbid")

    class starwhale.Dataset

    starwhale.Dataset 实现 Starwhale 数据集的抽象,能够对Standalone/Server/Cloud 实例上的数据集进行操作。

    from_huggingface

    from_huggingface 是一个 classmethod 方法,能够将 Huggingface 上的数据集转化为 Starwhale 数据集。

    def from_huggingface(
    cls,
    name: str,
    repo: str,
    subset: str | None = None,
    split: str | None = None,
    revision: str = "main",
    alignment_size: int | str = D_ALIGNMENT_SIZE,
    volume_size: int | str = D_FILE_VOLUME_SIZE,
    mode: DatasetChangeMode | str = DatasetChangeMode.PATCH,
    cache: bool = True,
    tags: t.List[str] | None = None,
    ) -> Dataset:

    参数

    • name: (str, required)
      • 数据集名称。
    • repo: (str, required)
    • subset: (str, optional)
      • Huggingface的数据集 subset 名称,如果HF数据集有多个 subsets, 您务必要指定一个 subset。
    • split: (str, optional)
      • Huggingface的数据集中 Split 名称。如果没有指定 split,则数据集中所有的 splits 数据都会被构建。
    • revision: (str, optional)
      • Huggingface的数据集版本,默认是 main,即main分支的最新一次提交。参数接受branch, tag 或 commit hash。
    • alignment_size: (int|str, optional)
      • 数据集 blob 文件中数据对齐的尺寸。
      • 默认值为 128,即128个字节对齐。
    • volume_size: (int|str, optional)
      • 数据集 blob 文件的最大尺寸,超过后会产生一个新的 blob 文件。
      • 默认值为 64MB,即每个blob文件64MB大小。
    • mode: (str|DatasetChangeMode, optional)
      • 数据集更新的模式,包括 patch 模式和 overwrite 模式。
      • 默认为 patch 模式。
    • cache: (bool, optional)
      • 是否使用 Huggingface 的本地缓存。
      • 默认使用缓存。
      • 缓存 = 下载文件缓存 + 本地Huggingface 数据集缓存。
    • tags: (List[str], optional)
      • 用户自定义的数据集标签。

    使用示例

    from starwhale import Dataset
    myds = Dataset.from_huggingface("mnist", "mnist")
    print(myds[0])
    from starwhale import Dataset
    myds = Dataset.from_huggingface("mmlu", "cais/mmlu", subset="anatomy", split="auxiliary_train", revision="7456cfb")

    from_json

    from_json 是一个 classmethod 方法,能够将 json 字符串转化为 Starwhale 数据集。

    @classmethod
    def from_json(
    cls,
    name: str,
    json_text: str,
    field_selector: str = "",
    alignment_size: int | str = D_ALIGNMENT_SIZE,
    volume_size: int | str = D_FILE_VOLUME_SIZE,
    mode: DatasetChangeMode | str = DatasetChangeMode.PATCH,
    tags: t.List[str] | None = None,
    ) -> Dataset:

    参数

    • name: (str, required)
      • 数据集名称
    • json_text: (str, required)
      • json 字符串,from_json 函数会序列化该字符串为 Python 对象,然后开始构建 Starwhale 数据集。
    • field_selector: (str, optional)
      • 可以提取 json_text 中特定的 array 结构。
      • 默认从 json 的根提取数据。
    • alignment_size: (int|str, optional)
      • 数据集 blob 文件中数据对齐的尺寸。
      • 默认值为 128,即128个字节对齐。
    • volume_size: (int|str, optional)
      • 数据集 blob 文件的最大尺寸,超过后会产生一个新的 blob 文件。
      • 默认值为 64MB,即每个blob文件64MB大小。
    • mode: (str|DatasetChangeMode, optional)
      • 数据集更新的模式,包括 patch 模式和 overwrite 模式。
      • 默认为 patch 模式。
    • tags: (List[str], optional)
      • 用户自定义的数据集标签。

    使用示例

    from starwhale import Dataset
    myds = Dataset.from_json(
    name="translation",
    json_text='[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]'
    )
    print(myds[0].features.en)
    from starwhale import Dataset
    myds = Dataset.from_json(
    name="translation",
    json_text='{"content":{"child_content":[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]}}',
    field_selector="content.child_content"
    )
    print(myds[0].features["zh-cn"])

    from_folder

    from_folder 是一个 classmethod 方法,能够读取指定目录中的 Image/Video/Audio 数据,并将其自动转化为 Starwhale 数据集。该函数支持如下特性:

    • 能够递归的搜索目标目录及子目录
    • 支持三种类型的文件提取:
      • image: 支持 png/jpg/jpeg/webp/svg/apng 图片类型。图片文件会被转化为 Starwhale.Image 类型。
      • video: 支持 mp4/webm/avi 视频类型。视频文件会被转化为 Starwhale.Video 类型。
      • audio: 支持 mp3/wav 音频类型。音频文件会被转化为 Starwhale.Audio 类型。
    • 每个文件对应数据集的一条记录,文件对应的数据集字段名称为 file
    • auto_label=True,则会使用父目录的名称作为该条数据的标签,对应 label 字段。根目录下的文件,则不会被打标签。
    • 若存在与 image/video/audio 同名的 txt 文件,则该文件内容会被作为 caption 字段内容存放到数据集中。
    • 若根目录存在 metadata.csvmetadata.jsonl 文件,则会自动读取文件的内容,并将其通过文件路径名作为关联,存入数据集中,可以用来指定 meta 信息。
      • metadata.csvmetadata.jsonl 文件是互斥的,当都存在的时候,程序会抛出异常。
      • metadata.csvmetadata.jsonl 每行记录中需要包含 file_name 字段,指向对应文件的路径。
      • metadata.csvmetadata.jsonl 对于数据集构建是可选的。
    @classmethod
    def from_folder(
    cls,
    folder: str | Path,
    kind: str | DatasetFolderSourceType,
    name: str | Resource = "",
    auto_label: bool = True,
    alignment_size: int | str = D_ALIGNMENT_SIZE,
    volume_size: int | str = D_FILE_VOLUME_SIZE,
    mode: DatasetChangeMode | str = DatasetChangeMode.PATCH,
    tags: t.List[str] | None = None,
    ) -> Dataset:

    参数

    • folder: (str|Path, required)
      • 文件夹路径
    • kind: (str|DatasetFolderSourceType, required)
      • 数据类型设置,目前支持 image, videoaudio 三种类型。
      • 会根据设置的 kind 值,在 folder 中递归寻找对应类型的文件。其他类型文件会被忽略掉。
    • name: (str|Resource, optional)
      • 数据集名称。
      • 若不指定,则使用目录名称作为数据集名称。
    • auto_label: (bool, optional)
      • 是否根据父目录的名字自动对每条记录打标签。
      • 默认为 True
    • alignment_size: (int|str, optional)
      • 数据集 blob 文件中数据对齐的尺寸。
      • 默认值为 128,即128个字节对齐。
    • volume_size: (int|str, optional)
      • 数据集 blob 文件的最大尺寸,超过后会产生一个新的 blob 文件。
      • 默认值为 64MB,即每个blob文件64MB大小。
    • mode: (str|DatasetChangeMode, optional)
      • 数据集更新的模式,包括 patch 模式和 overwrite 模式。
      • 默认为 patch 模式。
    • tags: (List[str], optional)
      • 用户自定义的数据集标签。

    使用示例 ${folder-example}

    • 函数调用示例

      from starwhale import Dataset

      # create a my-image-dataset dataset from /path/to/image folder.
      ds = Dataset.from_folder(
      folder="/path/to/image",
      kind="image",
      name="my-image-dataset"
      )
    • caption 示例

      folder/dog/1.png
      folder/dog/1.txt

      1.txt 中的内容,会填充到 1.png 所在行中 caption 字段中。

    • metadata.csvmetadata.jsonl 示例

      metadata.csv 内容:

      file_name, caption
      1.png, dog
      2.png, cat

      metadata.jsonl 内容:

      {"file_name": "1.png", "caption": "dog"}
      {"file_name": "2.png", "caption": "cat"}
    • 自动 label 示例

      folder/dog/1.png
      folder/cat/2.png
      folder/dog/3.png
      folder/cat/4.png

      生成的数据集中包含四条数据,分为 dogcat 两类。

    __iter__

    __iter__ 是一个 method 方法,能否对数据集进行迭代。

    from starwhale import dataset

    ds = dataset("mnist")

    for item in ds:
    print(item.index)
    print(item.features.label) # label 和 img 是 mnist数据集中的数据列
    print(item.features.img)

    batch_iter

    batch_iter 是一个 method 方法,能否批量的进行数据集迭代。

    def batch_iter(
    self, batch_size: int = 1, drop_not_full: bool = False
    ) -> t.Iterator[t.List[DataRow]]:

    参数

    • batch_size: (int, optional)
      • batch的大小,默认值为1。
    • drop_not_full: (bool, optional)
      • 最后一组batch数据数量小于 batch_size 时,该组数据是否会被抛弃掉。
      • 默认是不抛弃。

    使用示例

    from starwhale import dataset

    ds = dataset("mnist")
    for batch_rows in ds.batch_iter(batch_size=2):
    assert len(batch_rows) == 2
    print(batch_rows[0].features)

    __getitem__

    __getitem__ 是一个 method 方法,能提供数据集中某些行数据的获取,操作方式类似 Python 的 dict 和 list 类型。

    from starwhale import dataset

    ds = dataset("mock-int-index")

    # if the index type is string
    ds["str_key"] # get the DataRow by the "str_key" string key
    ds["start":"end"] # get a slice of the dataset by the range ("start", "end")

    ds = dataset("mock-str-index")
    # if the index type is int
    ds[1] # get the DataRow by the 1 int key
    ds[1:10:2] # get a slice of the dataset by the range (1, 10), step is 2

    __setitem__

    __setitem__ 是一个 method 方法,能提供数据集中行数据的更新,操作方式类似 Python 的 dict 类型。__setitem__ 支持多线程并行插入数据。

    def __setitem__(
    self, key: t.Union[str, int], value: t.Union[DataRow, t.Tuple, t.Dict]
    ) -> None:

    参数

    • key: (int|str, required)
      • key 即为数据集中每行的 index,类型为 int 或 str,一个数据集中只接受一种类型。
    • value: (DataRow|tuple|dict, required)
      • value 即为数据集中每行的 features,一般建议用 Python 的 dict 类型。

    使用示例

    • 插入数据

    test 数据中插入两条数据,index分别为 testtest2

    from starwhale import dataset

    with dataset("test") as ds:
    ds["test"] = {"txt": "abc", "int": 1}
    ds["test2"] = {"txt": "bcd", "int": 2}
    ds.commit()
    • 并行插入数据
    from starwhale import dataset, Binary
    from concurrent.futures import as_completed, ThreadPoolExecutor

    ds = dataset("test")

    def _do_append(_start: int) -> None:
    for i in range(_start, 100):
    ds.append((i, {"data": Binary(), "label": i}))

    pool = ThreadPoolExecutor(max_workers=10)
    tasks = [pool.submit(_do_append, i * 10) for i in range(0, 9)]

    ds.commit()
    ds.close()

    __delitem__

    __delitem__ 是一个 method 方法,用来删除数据集中的某些行数据。

    def __delitem__(self, key: _ItemType) -> None:
    from starwhale import dataset

    ds = dataset("existed-ds")
    del ds[6:9]
    del ds[0]
    ds.commit()
    ds.close()

    append

    append 是一个 method 方法,用来向数据集中添加数据,类似 Python list 的 append 函数。

    • 添加 features dict,每行数据自动 index 为 int 类型,从0开始自增。

      from starwhale import dataset, Image

      with dataset("new-ds") as ds:
      for i in range(0, 100):
      ds.append({"label": i, "image": Image(f"folder/{i}.png")})
      ds.commit()
    • 添加 index + features dict,数据集中每行数据的 index 不会被自动处理。

      from dataset import dataset, Image

      with dataset("new-ds") as ds:
      for i in range(0, 100):
      ds.append((f"index-{i}", {"label": i, "image": Image(f"folder/{i}.png")}))

      ds.commit()

    extend

    extend 是一个 method 方法,用来向数据集中批量添加数据,类似 Python list 的 extend 函数。

    from starwhale import dataset, Text

    ds = dataset("new-ds")
    ds.extend([
    (f"label-{i}", {"text": Text(), "label": i}) for i in range(0, 10)
    ])
    ds.commit()
    ds.close()

    commit

    commit 是一个 method 方法,调用 commit 时会将当前缓存中数据 flush 到存储中,并产生一个数据集版本,后续可以用这个版本信息加载相应的数据集内容。

    对于一个数据集,如果添加一些数据后,并没有调用 commit 方法,而是直接调用 close 或退出进程,那么这些数据依旧会写入到数据集中,只是没有一个生成一个新的版本。

    @_check_readonly
    def commit(
    self,
    tags: t.Optional[t.List[str]] = None,
    message: str = "",
    force_add_tags: bool = False,
    ignore_add_tags_errors: bool = False,
    ) -> str:

    参数

    • tags: (List(str), optional)
      • 指定 tags,可以指定多个tag。
    • message: (str, optional)
      • 提交信息,默认为空。
    • force_add_tags: (bool, optional)
      • 当给改版本添加标签时,对于 server/cloud 实例,若标签已经被应用到其他数据集版本时,可以使用 force_add_tags=True 参数强制将标签添加到此版本上,否则会抛出异常。
      • 默认为 False
    • ignore_add_tags_errors: (bool, optional)
      • 忽略添加标签是否抛出的异常。
      • 默认为 False

    使用示例

    from starwhale import dataset
    with dataset("mnist") as ds:
    ds.append({"label": 1})
    ds.commit(message="init commit")

    readonly

    readonly 是一个 property 属性,表示数据集是否只读,返回值为 bool 类型。

    from starwhale import dataset
    ds = dataset("mnist", readonly=True)
    assert ds.readonly

    loading_version

    loading_version 是一个 property 属性,字符串类型。

    • 当加载一个已经存在的数据集时,返回的是数据集加载的对应版本。
    • 对加载一个不存在的数据集时,返回的是 pending_commit_version

    pending_commit_version

    pending_commit_version 是一个 property 属性,字符串类型,表示待提交的版本信息。当调用 commit 方法后,pending_commit_version 会变成 committed_version

    committed_version

    committed_version 是一个 property 属性,字符串类型,表示已经调用 commit 方法后生成的版本信息。当没有调用 commit 方法时,访问该属性时程序会抛出异常。

    remove

    remove 是一个 method 方法,等价于 swcli dataset remove 命令,能够删除数据集。

    def remove(self, force: bool = False) -> None:

    recover

    recover 是一个 method 方法,等价于 swcli dataset recover 命令,能够对软删除且未GC的数据集进行恢复。

    def recover(self, force: bool = False) -> None:

    summary

    summary 是一个 method 方法,等价于 swcli dataset summary 命令,返回数据集摘要信息。

    def summary(self) -> t.Optional[DatasetSummary]:

    history

    history 是一个 method 方法,等价于 swcli dataset history 命令,返回数据集的历史记录。

    def history(self) -> t.List[t.Dict]:

    flush

    flush 是一个 method 方法,能够将内存中暂存的数据刷到持久化存储中。commitclose 方法会自动调用 flush

    close

    close 是一个 method 方法,关闭已经打开的数据集相关链接。Dataset 也实现了 contextmanager,使用 with 语法后可以自动关闭数据集,不需要主动调用 close 方法。

    from starwhale import dataset

    ds = dataset("mnist")
    ds.close()

    with dataset("mnist") as ds:
    print(ds[0])

    head 是一个 method 方法,能够显示数据集前n行数据,等价于 swcli dataset head 命令。

    def head(self, n: int = 5, skip_fetch_data: bool = False) -> t.List[DataRow]:

    fetch_one

    fetch_one 是一个 method 方法,获得数据集的第一条记录,相当于 head(n=1)[0]

    list

    list 是一个 classmethod 方法,能够列出项目 URI 下的 Starwhale 数据集,等价于 swcli dataset list 命令。

    @classmethod
    def list(
    cls,
    project_uri: t.Union[str, Project] = "",
    fullname: bool = False,
    show_removed: bool = False,
    page_index: int = DEFAULT_PAGE_IDX,
    page_size: int = DEFAULT_PAGE_SIZE,
    ) -> Tuple[DatasetListType, Dict[str, Any]]:

    copy

    copy 是一个 method 方法,能够复制数据到其他实例上,等价于 swcli dataset copy 命令。

    def copy(
    self,
    dest_uri: str,
    dest_local_project_uri: str = "",
    force: bool = False,
    mode: str = DatasetChangeMode.PATCH.value,
    ignore_tags: t.List[str] | None = None,
    ) -> None:

    参数

    • dest_uri: (str, required)
      • Dataset URI
    • dest_local_project_uri: (str, optional)
      • 从远端复制到本地 Standalone 实例时,可以指定对应的项目 URI。
    • force: (bool, optional)
      • 当目标实例上已经有相同版本的数据集时,是否强制覆盖。
      • 默认不覆盖。
      • 当复制标签到远端 Server/Cloud 实例时,若标签已经被其他版本使用,使用 force=True 参数可以强制变更标签到本版本上。
    • mode: (str, optional)
      • 数据集复制模式,分为 patch 模式 和 overwrite 模式,默认为 patch
      • patch: 使用补丁方式更新数据集,只更新计划变更的行和列,在新生成的版本中仍能读取到未受影响的行和列。
      • overwrite: 使用覆盖方式更新数据集,会将原来的所有行都删除,然后再进行更新,在新生成的版本中读取不到老数据。但请放心,删除的数据依旧可以通过旧版本进行访问。
    • ignore_tags (List[str], optional)
      • 复制数据集时,可以忽略的自定义标签。
      • 默认会复制所有用户自定义标签到其他实例中。
      • 复制标签会忽略 latest^v\d+$ 内建标签。

    使用示例

    from starwhale import dataset
    ds = dataset("mnist")
    ds.copy("cloud://remote-instance/project/starwhale")

    to_pytorch

    to_pytorch 是一个 method 方法,能够将 Starwhale 数据集转化为 Pytorch 的 torch.utils.data.Dataset 类型,可以进一步传给 torch.utils.data.DataLoader 进行使用。

    需要注意的是,to_pytorch 函数返回的是 Pytorch 的 IterableDataset

    def to_pytorch(
    self,
    transform: t.Optional[t.Callable] = None,
    drop_index: bool = True,
    skip_default_transform: bool = False,
    ) -> torch.utils.data.Dataset:

    参数

    • transform: (callable, optional)
      • 支持用户自定义变换函数,能够按需转化数据类型。
    • drop_index: (bool, optional)
      • 是否抛弃掉 index 字段。
    • skip_default_transform: (bool, optional)
      • 如果没有设置 transform, 默认状态下会使用 Starwhale 内建的 transform 函数,对数据进行转化,可以通过 skip_default_transform 参数禁用内建数据转化。

    使用示例

    import torch.utils.data as tdata
    from starwhale import dataset

    ds = dataset("mnist")

    torch_ds = ds.to_pytorch()
    torch_loader = tdata.DataLoader(torch_ds, batch_size=2)
    import torch.utils.data as tdata
    from starwhale import dataset

    with dataset("mnist") as ds:
    for i in range(0, 10):
    ds.append({"txt": Text(f"data-{i}"), "label": i})

    ds.commit()

    def _custom_transform(data: t.Any) -> t.Any:
    data = data.copy()
    txt = data["txt"].to_str()
    data["txt"] = f"custom-{txt}"
    return data

    torch_loader = tdata.DataLoader(
    dataset(ds.uri).to_pytorch(transform=_custom_transform), batch_size=1
    )
    item = next(iter(torch_loader))
    assert isinstance(item["label"], torch.Tensor)
    assert item["txt"][0] in ("custom-data-0", "custom-data-1")

    to_tensorflow

    to_tensorflow 是一个 method 方法,能够将 Starwhale 数据集转化为 Tensorflow 的 tensorflow.data.Dataset 类型。

    def to_tensorflow(self, drop_index: bool = True) -> tensorflow.data.Dataset:

    参数

    • drop_index: (bool, optional)
      • 是否抛弃掉 index 字段。

    使用示例

    from starwhale import dataset
    import tensorflow as tf

    ds = dataset("mnist")
    tf_ds = ds.to_tensorflow(drop_index=True)
    assert isinstance(tf_ds, tf.data.Dataset)

    with_builder_blob_config

    with_builder_blob_config 是一个 method 方法,用来设置 Starwhale 数据集中 blob 的相关属性信息。需要在变更数据之前调用。

    def with_builder_blob_config(
    self,
    volume_size: int | str | None = D_FILE_VOLUME_SIZE,
    alignment_size: int | str | None = D_ALIGNMENT_SIZE,
    ) -> Dataset:

    参数

    • volume_size: (int|str, optional)
      • 单个数据集 blob 文件的大小。
      • 默认值为 64MB。
      • 当类型为 int 时,单位为 Bytes。
      • 当类型为 str 是,格式类似 1GB, 64MB
    • alignment_size: (int|str, optional)
      • 数据集 blob 文件中数据对齐的大小
      • 默认值为 128个字节。
      • volume_size 一样的类型解析。

    使用示例

    from starwhale import dataset, Binary

    ds = dataset("mnist").with_builder_blob_config(volume_size="32M", alignment_size=128)
    ds.append({"data": Binary(b"123")})
    ds.commit()
    ds.close()

    with_loader_config

    with_loader_config 是一个 method 方法,用来设置 Starwhale 数据集 loader 的过程参数。

    def with_loader_config(
    self,
    num_workers: t.Optional[int] = None,
    cache_size: t.Optional[int] = None,
    field_transformer: t.Optional[t.Dict] = None,
    ) -> Dataset:

    参数

    • num_workers: (int, optional)
      • 加载数据集的 worker 数目,默认为2。
    • cache_size: (int, optional)
      • 预加载的数据的数量,默认为20条。
    • field_transformer: (dict, optional)
      • features 字段名称的变换。

    使用示例

    from starwhale import Dataset, dataset
    Dataset.from_json(
    "translation",
    '[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]'
    )
    myds = dataset("translation").with_loader_config(field_transformer={"en": "en-us"})
    assert myds[0].features["en-us"] == myds[0].features["en"]
    from starwhale import Dataset, dataset
    Dataset.from_json(
    "translation2",
    '[{"content":{"child_content":[{"en":"hello","zh-cn":"你好"},{"en":"how are you","zh-cn":"最近怎么样"}]}}]'
    )
    myds = dataset("translation2").with_loader_config(field_transformer={"content.child_content[0].en": "en-us"})
    assert myds[0].features["en-us"] == myds[0].features["content"]["child_content"][0]["en"]
    - + \ No newline at end of file diff --git a/zh/reference/sdk/evaluation/index.html b/zh/reference/sdk/evaluation/index.html index 29f7579a3..4d9a72463 100644 --- a/zh/reference/sdk/evaluation/index.html +++ b/zh/reference/sdk/evaluation/index.html @@ -10,7 +10,7 @@ - + @@ -18,7 +18,7 @@
    版本:0.6.4

    Starwhale 模型评测 SDK

    @evaluation.predict

    @evaluation.predict 是一个修饰器,定义模型评测中的推理过程,类似 MapReduce 中的 map 阶段,能够完成如下核心功能:

    • 在 Server 实例上,申请运行所需要的资源。
    • 自动读取本地或远端的数据集,将数据集中的数据以单条或批量的方式,传递给 evaluation.predict 修饰的函数。
    • 通过多副本的设置,实现分布式数据集消费的功能,能以水平扩展的方式缩短模型评测任务的用时。
    • 自动将函数返回值和数据集的输入 features 存储到 results 表中,方便Web UI展示和进一步的 evaluate 阶段使用。
    • 每单条或每批量组数据会调用一次被修饰的函数,完成推理过程。

    控制参数

    • resources: (dict, optional)
      • 定义 predict 每个任务在 Server 实例上运行时所需要的资源,包括 memorycpunvidia.com/gpu 三种类型。
        • memory: 单位为 Bytes,支持 int 和 float 类型。
          • 支持以字典的方式设置 requestlimit,比如 resources={"memory": {"request": 100 * 1024, "limit": 200: 1024}}
          • 若仅设置单个数字,则 SDK 会自动将 requestlimit 设置为相同的数值,比如 resources={"memory": 100 * 1024} 等价于 resources={"memory": {"request": 100 * 1024, "limit": 100 * 1024}}
        • cpu: 单位为 CPU 核心数,支持 int 和 float 类型。
          • 支持以字典的方式设置 requestlimit,比如 resources={"cpu": {"request": 1, "limit": 2}}
          • 若仅设置单个数字,则 SDK 会自动将 requestlimit 设置为相同的数值,比如 resources={"cpu": 1.5} 等价于 resources={"cpu": {"request": 1.5, "limit": 1.5}}
        • nvidia.com/gpu: 单位为 GPU显卡数,支持 int 类型。
          • nvidia.com/gpu 不支持设置 requestlimit,仅支持单个数字。
      • 需要注意: resource 参数目前仅在 Server 实例中生效。Cloud 实例,通过在提交评测任务时,选择对应的资源池达到相同的作用。Standalone 实例完全不支持该特性。
    • replicas: (int, optional)
      • predict 运行的副本数。
      • predict 相当于定义了一个 Step, 在该 Step 中有若干等价的 Task,每个 Task 在 Cloud/Server 实例上运行实体是 Pod,在 Standalone 实例上运行实体是 Thread。
      • 当指定多个副本时,这些副本是等价的,它们会共同消费选定的数据集,实现分布式数据集消费的目的,可以理解为某个数据集中的某行数据,只会被一个 predict 副本读取。
      • 默认值为1。
    • batch_size: (int, optional)
      • 批量将数据集中的数据传递进函数中。
      • 默认值为1。
    • fail_on_error: (bool, optional)
      • 当被修饰的函数抛出异常时,是否中断所有模型评测。如果预期某些“异常”数据会导致评测失败,但不想中断整体评测,可以设置 fail_on_error=False
      • 默认为 True
    • auto_log: (bool, optional)
      • 是否自动记录函数返回值和数据集输入 features 到 results 表中。
      • 默认为 True
    • log_mode: (str, optional)
      • auto_log=True 时,可以通过设置 log_mode 参数,定义以 plainpickle 方式记录函数返回值。
      • 默认为 pickle 方式。
    • log_dataset_features: (List[str], optional)
      • auto_log=True 时,可以通过该参数,选择性的记录数据集中的某些 features 。
      • 默认会记录所有的 features 。
    • needs: (List[Callable], optional)
      • 定义该任务运行的前置条件,可以用 needs 语法实现 DAG。
      • needs 接受被 @evaluation.predict, @evaluation.evaluate@handler 修饰的函数。
      • 默认为空,不依赖任何其他任务。

    传入参数

    被修饰的函数,需要定义一些输入参数,用来接受数据集内容等,包含如下模式:

    • 单个 data 参数:

      • data 为 一个类 dict 类型,能够读取到数据集的 features 内容。
      • batch_size=1 或不设置 batch_size 时,可以通过 data['label']data.label 方式读取 label feature。
      • 当设置 batch_size > 1 时,data 为一个 list。
      from starwhale import evaluation

      @evaluation.predict
      def predict(data):
      print(data['label'])
      print(data.label)
    • data + external 参数方式:

      • data 为数据集的features。
      • external 为一个 dict 类型,包含 index, index_with_dataset, dataset_info, contextdataset_uri 这些内建属性,可以用来做更细粒度的处理。
        • index: 数据集对应行的 index 信息。
        • index_with_dataset: 适用于多个数据集输入的时候做 index 区分。
        • dataset_info: starwhale.core.dataset.tabular.TabularDatasetInfo 对象。
        • context: starwhale.Context 对象。
        • dataset_uri: starwhale.nase.uri.resource.Resource 对象。
      from starwhale import evaluation

      @evaluation.predict
      def predict(data, external):
      print(data['label'])
      print(data.label)
      print(external["context"])
      print(external["dataset_uri"])
    • data + **kw 方式:

      • data 为数据集的features。
      • kw 可以读取到 external
      from starwhale import evaluation

      @evaluation.predict
      def predict(data, **kw):
      print(kw["external"]["context"])
      print(kw["external"]["dataset_uri"])
    • *args + **kwargs 方式:

      • args的第一个元素为 data
      from starwhale import evaluation

      @evaluation.predict
      def predict(*args, **kw):
      print(args[0].label)
      print(args[0]["label"])
      print(kw["external"]["context"])
    • **kwargs 方式:

      from starwhale import evaluation

      @evaluation.predict
      def predict(**kw):
      print(kw["data"].label)
      print(kw["data"]["label"])
      print(kw["external"]["context"])
    • *args 方式:

      • 此方式无法读取到 external 信息。
      from starwhale import evaluation

      @evaluation.predict
      def predict(*args):
      print(args[0].label)
      print(args[0]["label"])

    使用示例

    from starwhale import evaluation

    @evaluation.predict
    def predict_image(data):
    ...

    @evaluation.predict(
    dataset="mnist/version/latest",
    batch_size=32,
    replicas=4,
    needs=[predict_image],
    )
    def predict_batch_images(batch_data)
    ...

    @evaluation.predict(
    resources={"nvidia.com/gpu": 1,
    "cpu": {"request": 1, "limit": 2},
    "memory": 200 * 1024}, # 200MB
    log_mode="plain",
    )
    def predict_with_resources(data):
    ...

    @evaluation.predict(
    replicas=1,
    log_mode="plain",
    log_dataset_features=["txt", "img", "label"],
    )
    def predict_with_selected_features(data):
    ...

    @evaluation.evaluate

    @evaluation.evalute 是一个修饰器,定义模型评测中的评测过程,类似 MapReduce 中的 reduce 阶段,能够完成如下核心功能:

    • 在 Server 实例上,申请运行所需要的资源。
    • 自动读取 predict 阶段记录到 results 表的数据,并以迭代器的方式传入函数中。
    • evaluate 阶段只会运行一个副本,无法像 predict 阶段一样定义 replicas 参数。

    参数

    • resources: (dict, optional)
      • @evaluation.predict 中的 resources 参数定义保持一致。
    • needs: (List[Callable], optional)
      • @evaluation.predict 中的 needs 参数定义保持一致。
      • 绝大多数场景中,会依赖一个 @evaluation.predict 修饰的函数。
    • use_predict_auto_log: (bool, optional)
      • 默认为 True,传入一个能够能够遍历 predict 结果的迭代器到函数中。

    输入参数

    • use_predict_auto_log=True(默认)时,传入一个能够能够遍历 predict 结果的迭代器到函数中。
      • 迭代出来的对象为一个字典,包含 outputinput 两个key。
        • outputpredict 阶段函数返回的元素。
        • input 为推理时对应使用的数据集的 features ,为一个字典类型。
    • use_predict_auto_log=False 时,不传入任何参数到函数中。

    使用示例

    from starwhale import evaluation

    @evaluation.evaluate(needs=[predict_image])
    def evaluate_results(predict_result_iter):
    ...

    @evaluation.evaluate(
    use_predict_auto_log=False,
    needs=[predict_image],
    )
    def evaluate_results():
    ...

    class Evaluation

    starwhale.Evaluation 实现 Starwhale Model Evaluation 的抽象,能对Standalone/Server/Cloud实例上的Model Evaluation进行log和scan等操作,用来记录和检索指标。

    __init__

    __init__ 函数用来初始化一个 Evaluation 对象。

    class Evaluation
    def __init__(self, id: str, project: Project | str) -> None:

    参数

    • id: (str, required)
      • Evaluation 的 UUID,此ID由 Starwhale 系统自动生成。
    • project: (Project|str, required)
      • Project 对象或 Project URI 字符串。

    使用示例

    from starwhale import Evaluation

    standalone_e = Evaluation("fcd1206bf1694fce8053724861c7874c", project="self")
    server_e = Evaluation("fcd1206bf1694fce8053724861c7874c", project="cloud://server/project/starwhale:starwhale")
    cloud_e = Evaluation("2ddab20df9e9430dbd73853d773a9ff6", project="https://cloud.starwhale.cn/project/starwhale:llm-leaderboard")

    from_context

    from_context 是一个 classmethod 方法,获得当前 Context 下的 Evaluation 对象。from_context 在任务运行环境下才能生效,非任务运行环境调用该方法,会抛出 RuntimeError 异常,提示 Starwhale Context 没有被合理设置。

    @classmethod
    def from_context(cls) -> Evaluation:

    使用示例

    from starwhale import Evaluation

    with Evaluation.from_context() as e:
    e.log("label/1", 1, {"loss": 0.99, "accuracy": 0.98})

    log

    log 是一个 method 方法,记录某些评测指标到特定表中,之后可以通过 Server/Cloud 实例的 Web 页面或 scan 方法中查看相关的表。

    def log(
    self, category: str, id: t.Union[str, int], metrics: t.Dict[str, t.Any]
    ) -> None:

    参数

    • category: (str, required)
      • 记录的类别,该值会被作为 Starwhale Datastore 的表名的后缀。
      • 一个 category 会对应一张 Starwhale Datastore 的表,这些表会以评测任务ID作为隔离区分,相互不影响。
    • id: (str|int, required)
      • 记录的ID,表内唯一。
      • 同一张表,只能采用 str 或 int 的一种类型作为 ID 类型。
    • metrics: (dict, required)
      • 字典类型,key-value 方式记录指标。
      • key 为 str 类型。
      • value 既支持 int, float, str, bytes, bool 等常量类型,也支持 tuple, list, dict 等复合类型。同时也支持Artifacts类型 Starwhale.Image, Starwhale.Video, Starwhale.Audio, Starwhale.Text, Starwhale.Binary 的记录。
      • 当 value 中包含 dict 类型时,Starwhale SDK会自动展平字典,便于更好的进行可视化展示和指标对比。
        • 比如 metrics 为 {"test": {"loss": 0.99, "prob": [0.98,0.99]}, "image": [Image, Image]} , 存入后会变成 {"test/loss": 0.99, "test/prob": [0.98, 0.99], "image/0": Image, "image/1": Image} 结构。

    使用示例

    from starwhale import Evaluation

    evaluation_store = Evaluation.from_context()

    evaluation_store.log("label/1", 1, {"loss": 0.99, "accuracy": 0.98})
    evaluation_store.log("ppl", "1", {"a": "test", "b": 1})

    scan

    scan 是一个 method 方法,返回一个迭代器,用来读取某些模型评测表中的数据。

    def scan(
    self,
    category: str,
    start: t.Any = None,
    end: t.Any = None,
    keep_none: bool = False,
    end_inclusive: bool = False,
    ) -> t.Iterator:

    参数

    • category: (str, required)
      • log 函数中的 category 参数含义一致。
    • start: (Any, optional)
      • 起始 Key,若不指定,则从表的第一条数据开始。
    • end: (Any, optional)
      • 结束 Key,若不指定,则一直遍历到表的结尾。
    • keep_none: (bool, optional)
      • 若某列的值为 None,是否返回该列,默认不返回。
    • end_inclusive: (bool, optional)
      • 是否包含 end 对应的行,默认不包含。

    使用示例

    from starwhale import Evaluation

    evaluation_store = Evaluation(id="2ddab20df9e9430dbd73853d773a9ff6", project="https://cloud.starwhale.cn/projects/349")
    results = [data for data in evaluation_store.scan("label/0")]

    flush

    flush 是一个 method 方法,能够将 log 方法记录的指标立即刷新到 datastore 和 oss 存储中。若不调用 flush 方法,Evaluation 最后关闭的时候,也会自动刷新数据到存储中。

    def flush(self, category: str, artifacts_flush: bool = True) -> None

    参数

    • category: (str, required)
      • log 函数中的 category 参数含义一致。
    • artifacts_flush: (bool, optional)
      • 是否转储制品数据到blob文件,并上传到相关存储中。默认为 True

    log_result

    log_result 是一个 method 方法,记录评测指标到 results 表中,等价于 log 方法指定 category 参数为 resultsresults 表一般用来存储推理结果,@starwhale.predict 默认情况下会将修饰函数的返回值存储在 results 表中,也可以用 log_results 手动存储。

    def log_result(self, id: t.Union[str, int], metrics: t.Dict[str, t.Any]) -> None:

    参数

    • id: (str|int, required)
      • 记录的ID,results 表内唯一。
      • 同一张表,只能采用 str 或 int 的一种类型作为 ID 类型。
    • metrics: (dict, required)
      • log 函数中 metrics 参数定义一致。

    使用示例

    from starwhale import Evaluation

    evaluation_store = Evaluation(id="2ddab20df9e9430dbd73853d773a9ff6", project="self")
    evaluation_store.log_result(1, {"loss": 0.99, "accuracy": 0.98})
    evaluation_store.log_result(2, {"loss": 0.98, "accuracy": 0.99})

    scan_results

    scan_results 是一个 method 方法,返回一个迭代器,用来读取 results 表中的数据。

    def scan_results(
    self,
    start: t.Any = None,
    end: t.Any = None,
    keep_none: bool = False,
    end_inclusive: bool = False,
    ) -> t.Iterator:

    参数

    • start: (Any, optional)
      • 起始 Key,若不指定,则从表的第一条数据开始。
      • scan 函数中 start 参数定义一致。
    • end: (Any, optional)
      • 结束 Key,若不指定,则一直遍历到表的结尾。
      • scan 函数中 end 参数定义一致。
    • keep_none: (bool, optional)
      • 若某列的值为 None,是否返回该列,默认不返回。
      • scan 函数中 keep_none 参数定义一致。
    • end_inclusive: (bool, optional)
      • 是否包含 end 对应的行,默认不包含。
      • scan 函数中 end_inclusive 参数定义一致。

    使用示例

    from starwhale import Evaluation

    evaluation_store = Evaluation(id="2ddab20df9e9430dbd73853d773a9ff6", project="self")

    evaluation_store.log_result(1, {"loss": 0.99, "accuracy": 0.98})
    evaluation_store.log_result(2, {"loss": 0.98, "accuracy": 0.99})
    results = [data for data in evaluation_store.scan_results()]

    flush_results

    flush_results 是一个 method 方法,能够将 log_results 方法记录的指标立即刷新到 datastore 和 oss 存储中。若不调用 flush_results 方法,Evaluation 最后关闭的时候,也会自动刷新数据到存储中。

    def flush_results(self, artifacts_flush: bool = True) -> None:

    参数

    • artifacts_flush: (bool, optional)
      • 是否转储制品数据到blob文件,并上传到相关存储中。默认为 True
      • flush 方法中 artifacts_flush 参数定义一致。

    log_summary

    log_summary 是一个 method 方法,记录某些指标到 summary 表中,Server/Cloud 实例评测页面显示的就是 summary 表的数据。 每次调用,Starwhale 都会自动以此次评测的唯一ID作为表的行ID进行更新,可以再一次评测过程中多次调用该函数,用来更新不同的列。

    每个项目中有一张 summary 表,所有该项目下的评测任务都会将 summary 信息写入该表中,便于进行不同模型评测的结果对比。

    def log_summary(self, *args: t.Any, **kw: t.Any) -> None:

    log 函数一致,也会对字典类型自动展平。

    使用示例

    from starwhale import Evaluation

    evaluation_store = Evaluation(id="2ddab20df9e9430dbd73853d773a9ff6", project="https://cloud.starwhale.cn/projects/349")

    evaluation_store.log_summary(loss=0.99)
    evaluation_store.log_summary(loss=0.99, accuracy=0.99)
    evaluation_store.log_summary({"loss": 0.99, "accuracy": 0.99})

    get_summary

    get_summary 是一个 method 方法,用来返回 log_summary 记录的信息。

    def get_summary(self) -> t.Dict:

    flush_summary

    flush_summary 是一个 method 方法,能够将 log_summary 方法记录的指标立即刷新到 datastore 和 oss 存储中。若不调用 flush_results 方法,Evaluation 最后关闭的时候,也会自动刷新数据到存储中。

    def flush_summary(self, artifacts_flush: bool = True) -> None:

    参数

    • artifacts_flush: (bool, optional)
      • 是否转储制品数据到blob文件,并上传到相关存储中。默认为 True
      • flush 方法中 artifacts_flush 参数定义一致。

    flush_all

    flush_all 是一个 method 方法,能够将 log, log_results, log_summary 方法记录的指标立即刷新到 datastore 和 oss 存储中。若不调用 flush_all 方法,Evaluation 最后关闭的时候,也会自动刷新数据到存储中。

    def flush_all(self, artifacts_flush: bool = True) -> None:

    参数

    • artifacts_flush: (bool, optional)
      • 是否转储制品数据到blob文件,并上传到相关存储中。默认为 True
      • flush 方法中 artifacts_flush 参数定义一致。

    get_tables

    get_tables 是一个 method 方法,返回模型评测中产生的所有表的名称,需要注意的是,该函数并不返回 summary 表名称。

    def get_tables(self) -> t.List[str]:

    close

    close 是一个 method 方法,用来关闭 Evaluation 对象。close 调用时会将,会自动刷新数据到存储中。同时 Evaluation 也实现了 __enter____exit__ 方法,可以用 with 语法简化 close 的手工调用。

    def close(self) -> None:

    使用示例

    from starwhale import Evaluation

    evaluation_store = Evaluation(id="2ddab20df9e9430dbd73853d773a9ff6", project="https://cloud.starwhale.cn/projects/349")
    evaluation_store.log_summary(loss=0.99)
    evaluation_store.close()

    # auto close when the with-context exits.
    with Evaluation.from_context() as e:
    e.log_summary(loss=0.99)

    @handler

    @handler 是一个修饰器,具备如下功能:

    • 在 Server 实例上,申请运行所需要的资源。
    • 可以控制副本数。
    • 多个 Handlers 可以通过依赖关系,生成DAG,便于控制执行流程。
    • 可以对外暴露端口,以类似 Web Handler 方式运行。

    @fine_tune, @evaluation.predict@evaluation.evalute 可以认为是 @handler 在某些特定领域的应用,@handler 是这些修饰器的底层实现。@handler 更为基础和灵活。

    @classmethod
    def handler(
    cls,
    resources: t.Optional[t.Dict[str, t.Any]] = None,
    replicas: int = 1,
    needs: t.Optional[t.List[t.Callable]] = None,
    name: str = "",
    expose: int = 0,
    require_dataset: bool = False,
    ) -> t.Callable:

    参数

    • resources: (dict, optional)
      • @evaluation.predict 中的 resources 参数定义保持一致。
    • needs: (List[Callable], optional)
      • @evaluation.predict 中的 needs 参数定义保持一致。
    • replicas: (int, optional)
      • @evaluation.predict 中的 replicas 参数定义保持一致。
    • name: (str, optional)
      • 显示 handler 时候用的名字。
      • 若不指定,则用修饰函数的名字。
    • expose: (int, optional)
      • 对外暴露的端口,当运行一个 Web Handler的时候,需要声明暴露的端口。
      • 默认为0,表示不暴露任何端口。
      • 目前只能暴露一个端口。
    • require_dataset: (bool, optional)
      • 定义此 Handler 运行时,是否需要数据集。
      • 如果 required_dataset=True,在 Server/Cloud 实例的 Web 界面创建评测任务的时候,需要让用户强制输入数据集;如果 required_dataset=False,则 Web 界面中不需要用户指定数据集。
      • 默认为 False

    使用示例

    from starwhale import handler
    import gradio

    @handler(resources={"cpu": 1, "nvidia.com/gpu": 1}, replicas=3)
    def my_handler():
    ...

    @handler(needs=[my_handler])
    def my_another_handler():
    ...

    @handler(expose=7860)
    def chatbot():
    with gradio.Blocks() as server:
    ...
    server.launch(server_name="0.0.0.0", server_port=7860)

    @fine_tune

    fine_tune 是一个修饰器,定义模型训练的微调(fine-tune)过程。

    一些限制和使用建议:

    • fine_tune 只有一个副本。
    • fine_tune 需要有数据集输入。
    • 一般在 fine_tune 开始时,通过 Context.get_runtime_context() 获取数据集。
    • 一般在 fine_tune 结束是,通过 starwhale.model.build 生成微调后的Starwhale 模型包,该模型包会被自动复制到评测对应的项目中。

    参数

    • resources: (dict, optional)
      • @evaluation.predict 中的 resources 参数定义保持一致。
    • needs: (List[Callable], optional)
      • @evaluation.predict 中的 needs 参数定义保持一致。

    使用示例

    from starwhale import model as starwhale_model
    from starwhale import fine_tune, Context

    @fine_tune(resources={"nvidia.com/gpu": 1})
    def llama_fine_tuning():
    ctx = Context.get_runtime_context()

    if len(ctx.dataset_uris) == 2:
    # TODO: use more graceful way to get train and eval dataset
    train_dataset = dataset(ctx.dataset_uris[0], readonly=True, create="forbid")
    eval_dataset = dataset(ctx.dataset_uris[1], readonly=True, create="forbid")
    elif len(ctx.dataset_uris) == 1:
    train_dataset = dataset(ctx.dataset_uris[0], readonly=True, create="forbid")
    eval_dataset = None
    else:
    raise ValueError("Only support 1 or 2 datasets(train and eval dataset) for now")

    #user training code
    train_llama(
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    )

    model_name = get_model_name()
    starwhale_model.build(name=f"llama-{model_name}-qlora-ft")

    @multi_classification

    @multi_classification 修饰器使用sklearn lib对多分类问题进行结果分析,输出confusion matrix, roc, auc等值,并且会写入到 starwhale DataStore 相关表中。 使用的时候需要对所修饰的函数返回值有一定要求,返回(label, result, probability_matrix)(label, result)

    def multi_classification(
    confusion_matrix_normalize: str = "all",
    show_hamming_loss: bool = True,
    show_cohen_kappa_score: bool = True,
    show_roc_auc: bool = True,
    all_labels: t.Optional[t.List[t.Any]] = None,
    ) -> t.Any:

    参数

    • confusion_matrix_normalize: (str, optional)
      • 接收三种参数:
        • true: rows
        • pred: columns
        • all: rows+columns
    • show_hamming_loss: (bool, optional)
      • 是否计算hamming loss。
      • 默认为 True
    • show_cohen_kappa_score: (bool, optional)
      • 是否计算 cohen kappa score。
      • 默认为 True
    • show_roc_auc: (bool, optional)
      • 是否计算roc/auc, 计算的时候,需要函数返回(label,result, probability_matrix) 三元组,否则只需返回(label, result) 两元组即可。
      • 默认为 True
    • all_labels: (List, optional)
      • 定义所有的Labels。

    使用示例


    @multi_classification(
    confusion_matrix_normalize="all",
    show_hamming_loss=True,
    show_cohen_kappa_score=True,
    show_roc_auc=True,
    all_labels=[i for i in range(0, 10)],
    )
    def evaluate(ppl_result) -> t.Tuple[t.List[int], t.List[int], t.List[t.List[float]]]:
    label, result, probability_matrix = [], [], []
    return label, result, probability_matrix

    @multi_classification(
    confusion_matrix_normalize="all",
    show_hamming_loss=True,
    show_cohen_kappa_score=True,
    show_roc_auc=False,
    all_labels=[i for i in range(0, 10)],
    )
    def evaluate(ppl_result) -> t.Tuple[t.List[int], t.List[int], t.List[t.List[float]]]:
    label, result = [], [], []
    return label, result

    PipelineHandler

    PipelineHandler 是一个类,提供默认的模型评测过程定义,需要用户实现 predictevaluate 函数。

    PipelineHandler 等价于 @evaluation.predict + @evaluation.evaluate,展示使用方式不一样,背后的模型评测过程一致。

    用户需要实现如下函数:

    • predict: 定义推理过程,等价于 @evaluation.predict 修饰的函数。
    • evaluate: 定义评测过程,等价于 @evaluation.evaluate 修饰的函数。
    from typing import Any, Iterator
    from abc import ABCMeta, abstractmethod

    class PipelineHandler(metaclass=ABCMeta):
    def __init__(
    self,
    predict_batch_size: int = 1,
    ignore_error: bool = False,
    predict_auto_log: bool = True,
    predict_log_mode: str = PredictLogMode.PICKLE.value,
    predict_log_dataset_features: t.Optional[t.List[str]] = None,
    **kwargs: t.Any,
    ) -> None:
    self.context = Context.get_runtime_context()
    ...

    def predict(self, data: Any, **kw: Any) -> Any:
    raise NotImplementedError

    def evaluate(self, ppl_result: Iterator) -> Any
    raise NotImplementedError

    参数

    • predict_batch_size: (int, optional)
      • 等价于 @evaluation.predict 中的 batch_size 参数。
      • 默认值为1。
    • ignore_error: (bool, optional)
      • 等价于 @evaluation.predict 中的 fail_on_error 参数。
      • 默认值为 False
    • predict_auto_log: (bool, optional)
      • 等价于 @evaluation.predict 中的 auto_log 参数。
      • 默认值为 True
    • predict_log_mode: (bool, optional)
      • 等价于 @evaluation.predict 中的 log_mode 参数。
      • 默认值为 pickle
    • predict_log_dataset_features: (bool, optional)
      • 等价于 @evaluation.predict 中的 log_dataset_features 参数。
      • 默认值为空,对记录所有 features。

    PipelineHandler.run 修饰符

    PipelineHandler.run 修饰符可以对 predictevaluate 方法进行资源描述,支持 replicasresources 的定义:

    • PipelineHandler.run 只能修饰继承自 PipelineHandler 子类中的 predictevaluate方法。
    • predict 方法可以设置 replicas 参数。evaluate 方法的 replicas 值永远为1。
    • resources 参数与 @evaluation.predict@evaluation.evaluate 中的 resources 参数定义和使用方法保持一致。
    • PipelineHandler.run 修饰器是可选的。
    • PipelineHandler.run 仅在 Server 和 Cloud 实例中生效,Standalone 实例不支持资源定义。
    @classmethod
    def run(
    cls, resources: t.Optional[t.Dict[str, t.Any]] = None, replicas: int = 1
    ) -> t.Callable:

    使用示例

    import typing as t

    import torch
    from starwhale import PipelineHandler

    class Example(PipelineHandler):
    def __init__(self) -> None:
    super().__init__()
    self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    self.model = self._load_model(self.device)

    @PipelineHandler.run(replicas=4, resources={"memory": 1 * 1024 * 1024 *1024, "nvidia.com/gpu": 1}) # 1G Memory, 1 GPU
    def predict(self, data: t.Dict):
    data_tensor = self._pre(data.img)
    output = self.model(data_tensor)
    return self._post(output)

    @PipelineHandler.run(resources={"memory": 1 * 1024 * 1024 *1024}) # 1G Memory
    def evaluate(self, ppl_result):
    result, label, pr = [], [], []
    for _data in ppl_result:
    label.append(_data["input"]["label"])
    result.extend(_data["output"][0])
    pr.extend(_data["output"][1])
    return label, result, pr

    def _pre(self, input: Image) -> torch.Tensor:
    ...

    def _post(self, input):
    ...

    def _load_model(self, device):
    ...

    Context

    执行模型评测过程中传入的上下文信息,包括Project、Task ID等。Context 的内容是自动注入的,可以通过如下方式使用:

    • 继承 PipelineHandler 类内使用 self.context 对象。
    • 通过 Context.get_runtime_context() 获取。

    需要注意,只有在模型评测过程中,才能使用Context,否则程序会抛出异常。

    目前Context可以获得如下值:

    • project: str
      • Project 名字。
    • version: str
      • 模型评测的唯一ID。
    • step: str
      • Step 名字。
    • total: int
      • Step 下所有 Task 的数量。
    • index: int
      • Task 索引标号,下标从0开始。
    • dataset_uris: List[str]
      • Starwhale 数据集的URI 列表。

    使用示例


    from starwhale import Context, PipelineHandler

    def func():
    ctx = Context.get_runtime_context()
    print(ctx.project)
    print(ctx.version)
    print(ctx.step)
    ...

    class Example(PipelineHandler):

    def predict(self, data: t.Dict):
    print(self.context.project)
    print(self.context.version)
    print(self.context.step)

    @starwhale.api.service.api

    @starwhale.api.service.api 是一个修饰器,提供基于 Gradio 的简易 Web Handler 输入定义,当用户使用 swcli model serve 命令启动 Web Service 接收外部请求,并将推理结果返回给用户,实现在线评测。

    使用示例

    import gradio
    from starwhale.api.service import api

    def predict_image(img):
    ...

    @api(gradio.File(), gradio.Label())
    def predict_view(file: t.Any) -> t.Any:
    with open(file.name, "rb") as f:
    data = Image(f.read(), shape=(28, 28, 1))
    _, prob = predict_image({"img": data})
    return {i: p for i, p in enumerate(prob)}

    starwhale.api.service.Service

    如果希望自定义 web service 的实现, 可以继承 Service 并重写 serve 函数即可。

    class CustomService(Service):
    def serve(self, addr: str, port: int, handler_list: t.List[str] = None) -> None:
    ...

    svc = CustomService()

    @svc.api(...)
    def handler(data):
    ...

    说明:

    • 使用 PipelineHandler.add_api 函数添加的 handler 和 api 以及实例化的 Service.api decorator 添加的 handler 可以同时生效
    • 如果使用自定义的 Service, 需要在 model 中实例化自定义的 Service 类

    自定义 Request 和 Response

    Request 和 Response 分别是用于接收用户请求和返回给用户结果的处理类, 可以简单的理解成是 handler 的前处理和后处理逻辑

    Starwhale 将支持 Dataset 内置类型的 Request 实现以及 Json Response 的实现, 同时用户可以自定义处理逻辑来使用, 自定义的示例如下:

    import typing as t

    from starwhale.api.service import (
    Request,
    Service,
    Response,
    )

    class CustomInput(Request):
    def load(self, req: t.Any) -> t.Any:
    return req


    class CustomOutput(Response):
    def __init__(self, prefix: str) -> None:
    self.prefix = prefix

    def dump(self, req: str) -> bytes:
    return f"{self.prefix} {req}".encode("utf-8")

    svc = Service()

    @svc.api(request=CustomInput(), response=CustomOutput("hello"))
    def foo(data: t.Any) -> t.Any:
    ...
    - + \ No newline at end of file diff --git a/zh/reference/sdk/job/index.html b/zh/reference/sdk/job/index.html index d8cb7e538..5198115b7 100644 --- a/zh/reference/sdk/job/index.html +++ b/zh/reference/sdk/job/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    Starwhale 任务 SDK

    job

    通过Job URI参数获取 starwhale.Job 对象,可以获得 Standalone/Server/Cloud 实例上的任务。

    @classmethod
    def job(
    cls,
    uri: str,
    ) -> Job:

    参数

    • uri: (str, required)
      • Job URI格式的字符串。

    使用示例

    from starwhale import job
    # get job object of uri=https://server/job/1
    j1 = job("https://server/job/1")
    # get job from standalone instance
    j2 = job("local/project/self/job/xm5wnup")
    j3 = job("xm5wnup")

    class starwhale.Job

    starwhale.Job 实现对 Starwhale 任务的抽象,能够对 Standalone/Server/Cloud 实例上的任务进行一些信息获取类的操作。

    list

    list 是一个 classmethod 方法,能够列出某个项目下的任务。

    @classmethod
    def list(
    cls,
    project: str = "",
    page_index: int = DEFAULT_PAGE_IDX,
    page_size: int = DEFAULT_PAGE_SIZE,
    ) -> t.Tuple[t.List[Job], t.Dict]:

    参数

    • project: (str, optional)
      • Project URI,Standalone/Server/Cloud 实例上的项目都可以。
      • 若不指定 project 参数,则使用 swcli project selected 命令选定的项目。
    • page_index: (int, optional)
      • 当获取 Server/Cloud 实例的项目列表时,支持翻页操作,该参数可以指定页面序号。
        • 默认值为 1。
        • 页面起始序号为 1。
      • Standalone 实例不支持翻页操作,设置该参数无效。
    • page_size: (int, optional)
      • 当获取 Server/Cloud 实例的项目列表时,支持翻页操作,该参数可以指定每页返回的任务数量。
        • 默认值为 1。
        • 页面起始序号为 1。
      • Standalone 实例不支持翻页操作,设置该参数无效。

    使用示例

    from starwhale import Job
    # list jobs of current selected project
    jobs, pagination_info = Job.list()
    # list jobs of starwhale/public project in the cloud.starwhale.cn instance
    jobs, pagination_info = Job.list("https://cloud.starwhale.cn/project/starwhale:public")
    # list jobs of id=1 project in the server instance, page index is 2, page size is 10
    jobs, pagination_info = Job.list("https://server/project/1", page_index=2, page_size=10)

    get

    get 是一个 classmethod 方法,能够获得某个特定任务的信息,返回 Starwhale.Job 对象,与 starwhale.job 函数功能和参数定义上完全一致。

    使用示例

    from starwhale import Job
    # get job object of uri=https://server/job/1
    j1 = Job.get("https://server/job/1")
    # get job from standalone instance
    j2 = Job.get("local/project/self/job/xm5wnup")
    j3 = Job.get("xm5wnup")

    summary

    summary 是一个 property 属性,返回任务运行中写入 summary 表中的数据,字典类型。

    @property
    def summary(self) -> t.Dict[str, t.Any]:

    使用示例

    from starwhale import jobs
    j1 = job("https://server/job/1")
    print(j1.summary)

    tables

    tables 是一个 property 属性,返回任务运行中创建的表名(不包括 summary 表,以为 summary 表是项目级别自动创建的),列表类型。

    @property
    def tables(self) -> t.List[str]:

    使用示例

    from starwhale import jobs
    j1 = job("https://server/job/1")
    print(j1.tables)

    get_table_rows

    get_table_rows 是一个 method 方法,可以根据表名等参数返回数据表的记录,迭代器类型。

    def get_table_rows(
    self,
    name: str,
    start: t.Any = None,
    end: t.Any = None,
    keep_none: bool = False,
    end_inclusive: bool = False,
    ) -> t.Iterator[t.Dict[str, t.Any]]:

    参数

    • name: (str, required)
      • datastore 表名。通过 tables 属性获得的表名,可以传给 name 参数。
    • start: (Any, optional)
      • 返回记录中,ID的起始值。
      • 默认值为 None,表示从头开始。
    • end: (Any, optional)
      • 返回记录中,ID的结束值。
      • 默认值为 None ,表示一直到表末尾。
      • startend 都为 None,则会以迭代器方式返回整个表的数据。
    • keep_none: (bool, optional)
      • 是否返回值为 None的记录。
      • 默认为 False。
    • end_inclusive: (bool, optional)
      • end 参数设置时,迭代记录的时候,是否包含end记录。
      • 默认为 False。

    使用示例

    from starwhale import job
    j = job("local/project/self/job/xm5wnup")
    table_name = j.tables[0]
    for row in j.get_table_rows(table_name):
    print(row)
    rows = list(j.get_table_rows(table_name, start=0, end=100))
    # return the first record from the results table
    result = list(j.get_table_rows('results', start=0, end=1))[0]

    status

    status 是一个 property 属性,返回当前Job的实时状态,字符串类型,状态包含 CREATED, READY, PAUSED, RUNNING, CANCELLING, CANCELED, SUCCESS, FAILUNKNOWN

    @property
    def status(self) -> str:

    create

    create 是一个 classmethod 方法,能够创建 Standalone 实例或 Server/Cloud 实例上的任务,包括Model Evluation, Fine-tuning, Online Serving 和 Developing 等类型的任务。函数返回 Job 类型的对象。

    • create 通过 project 参数决定生成的任务运行在何种实例上,包括 Standalone 和 Server/Cloud 实例。
    • 在 Standalone 实例下,create 创建一个同步执行的任务。
    • 在 Server/Cloud 实例下, create 创建一个异步执行的任务。
    @classmethod
    def create(
    cls,
    project: Project | str,
    model: Resource | str,
    run_handler: str,
    datasets: t.List[str | Resource] | None = None,
    runtime: Resource | str | None = None,
    resource_pool: str = DEFAULT_RESOURCE_POOL,
    ttl: int = 0,
    dev_mode: bool = False,
    dev_mode_password: str = "",
    dataset_head: int = 0,
    overwrite_specs: t.Dict[str, t.Any] | None = None,
    ) -> Job:

    参数

    对所有实例都生效的参数:

    • project: (Project|str, required)
      • Project 对象或 Project URI 字符串。
    • model: (Resource|str, required)
      • Model URI 字符串或 Model 类型的 Resource 对象,表示要运行的 Starwhale 模型包。
    • run_handler: (str, required)
      • Starwhale 模型包中对应的可运行的 handler 名称,比如 mnist 的 evaluate handler: mnist.evaluator:MNISTInference.evaluate
    • datasets: (List[str | Resource], optional)
      • Starwhale 模型包运行所需要的数据集,非必需。

    仅对 Standalone 实例生效的参数:

    • dataset_head: (int, optional)
      • 一般用于调试场景,只使用数据集前 N 条数据来供 Starwhale 模型来消费。

    仅对 Server/Cloud 实例生效的参数:

    • runtime: (Resource | str, optional)
      • Runtime URI 字符串或 Runtime 类型的 Resource 对象,表示要运行任务所需要的 Starwhale 运行时。
      • 当不指定该参数时,会尝试使用 Starwhale 模型包的内建运行时。
      • 创建 Standalone 实例下的任务,使用 Python 脚本所用的 Python 解释器环境作为自己的运行时,不支持通过 runtime 参数指定运行时。若有指定运行时的需要,可以使用 swcli model run 命令。
    • resource_pool: (str, optional)
      • 指定任务运行在哪个资源池中,默认为 default 资源池。
    • ttl: (int, optional)
      • 任务最大存活时间,超时后会被杀掉。
      • 参数单位为秒。
      • 默认情况下,ttl为0,表示没有超时限制,任务会按预期运行。
      • 当ttl小于0时,也表示没有超时限制。
    • dev_mode: (bool, optional)
      • 是否设置为调试模式。开启此模式后,可以通过VSCode Web进入到相关环境中。
      • 默认不进入调试模式。
    • dev_mode_password: (str, optional)
      • 调试模式下VSCode Web的登录密码。
      • 默认为空,此时会用任务的UUID作为密码,可以通过 job.info().job.uuid 获得。
    • overwrite_specs: (Dict[str, Any], optional)
      • 支持设置 handler 的 replicasresources 字段。
      • 若为空,则使用模型包中对应 handler 设置的值。
      • overwrite_specs 的 key 为 handler 的名字,比如 mnist 的 evaluate handler: mnist.evaluator:MNISTInference.evaluate
      • overwrite_specs 的 value 为设置的值,字典格式,支持设置 replicasresources , 比如 {"replicas": 1, "resources": {"memory": "1GiB"}}

    使用示例

    • 创建一个 Cloud 实例的任务
    from starwhale import Job
    project = "https://cloud.starwhale.cn/project/starwhale:public"
    job = Job.create(
    project=project,
    model=f"{project}/model/mnist/version/v0",
    run_handler="mnist.evaluator:MNISTInference.evaluate",
    datasets=[f"{project}/dataset/mnist/version/v0"],
    runtime=f"{project}/runtime/pytorch",
    overwrite_specs={"mnist.evaluator:MNISTInference.evaluate": {"resources": "4GiB"},
    "mnist.evaluator:MNISTInference.predict": {"resources": "8GiB", "replicas": 10}}
    )
    print(job.status)
    • 创建一个 Standalone 实例的任务
    from starwhale import Job
    job = Job.create(
    project="self",
    model="mnist",
    run_handler="mnist.evaluator:MNISTInference.evaluate",
    datasets=["mnist"],
    )
    print(job.status)
    - + \ No newline at end of file diff --git a/zh/reference/sdk/model/index.html b/zh/reference/sdk/model/index.html index 29d65c2e9..b9c595d65 100644 --- a/zh/reference/sdk/model/index.html +++ b/zh/reference/sdk/model/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    Starwhale 模型 SDK

    model.build

    model.build 是一个函数,能够构建 Starwhale 模型,等价于 swcli model build 命令。

    def build(
    modules: t.Optional[t.List[t.Any]] = None,
    workdir: t.Optional[_path_T] = None,
    name: t.Optional[str] = None,
    project_uri: str = "",
    desc: str = "",
    remote_project_uri: t.Optional[str] = None,
    add_all: bool = False,
    tags: t.List[str] | None = None,
    ) -> None:

    参数

    • modules: (List[str|object], optional)
      • 构建时导入的模块,为列表类型,可以指定多个模块。
      • 模块类型包含两种:
        • 字符串类型: Python 可 Import 的路径,比如 "to.path.module", "to.path.module:object" 。
        • Python 对象: model.build 函数会自动解析所对应的模块。
      • 如果不指定,则会搜索当前已经导入的模块。
    • name: (str, optional)
      • Starwhale 模型的名称。
      • 若不指定,则会使用 cwd 目录名作为 Starwhale 模型的名称。
    • workdir: (str, Pathlib.Path, optional)
      • Starwhale 模型打包的根目录,此目录下的文件会被打包。
    • project_uri: (str, optional)
      • Project URI,表示该模型属于哪个项目。
      • 默认为 swcli project select 选择的项目。
    • desc: (str, optional)
      • 描述信息,默认为空。
    • remote_project_uri: (str, optional)
      • 其他示例的项目 URI,构建完Starwhale 模型后,会被自动复制到远端实例中。
    • add_all: (bool, optional)
      • Starwhale 模型打包的时候会自动忽略一些类似 pyc/venv/conda 构建目录等,可以通过该参数将这些文件也进行打包。即使该参数使用,也不影响 .swignore 文件的预期作用。
      • 默认为 False
    • tags: (List[str], optional)
      • 用户自定义标签。
      • 不能指定 latest^v\d+$ 这两个 Starwhale 系统内建标签。

    使用示例

    from starwhale import model

    # class search handlers
    from .user.code.evaluator import ExamplePipelineHandler
    model.build([ExamplePipelineHandler])

    # function search handlers
    from .user.code.evaluator import predict_image
    model.build([predict_image])

    # module handlers, @handler decorates function in this module
    from .user.code import evaluator
    model.build([evaluator])

    # str search handlers
    model.build(["user.code.evaluator:ExamplePipelineHandler"])
    model.build(["user.code1", "user.code2"])

    # no search handlers, use imported modules
    model.build()

    # add user custom tags
    model.build(tags=["t1", "t2"])
    - + \ No newline at end of file diff --git a/zh/reference/sdk/other/index.html b/zh/reference/sdk/other/index.html index 7c5f08616..0d9b9111b 100644 --- a/zh/reference/sdk/other/index.html +++ b/zh/reference/sdk/other/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    其他 SDK

    __version__

    Starwhale Python SDK 和 swcli 版本,是字符串常量。

    >>> from starwhale import __version__
    >>> print(__version__)
    0.5.7

    init_logger

    init_logger 用来设置日志输出级别。默认为0

    • 0: 输出 errors 信息,traceback 呈现最近的1个堆栈。
    • 1: 输出 errors + warnings 信息,traceback 呈现最近的5个堆栈内容。
    • 2: 输出 errors + warnings + info 信息,trackback 呈现最多10个堆栈内容。
    • 3: 输出 errors + warnings + info + debug 信息,trackback 呈现最多100个堆栈内容。
    • >=4: 输出 errors + warnings + info + debug + trace 信息,trackback 呈现最多1000个堆栈内容。
    def init_logger(verbose: int = 0) -> None:

    login

    登录 server/cloud 实例,等价于 swcli instance login 命令。登录 Standalone 实例是无意义的。

    def login(
    instance: str,
    alias: str = "",
    username: str = "",
    password: str = "",
    token: str = "",
    ) -> None:

    参数

    • instance: (str, required)
      • server/cloud 实例的 http url。
    • alias: (str, optional)
      • 实例的别名,可以简化 Starwhale URI 中 instance部分。
      • 若不指定,则使用实例的 http url 中 hostname 部分。
    • username: (str, optional)
    • password: (str, optional)
    • token: (str, optional)
      • username + passwordtoken 只能选择一种方式登录实例。

    使用示例

    from starwhale import login

    # login to Starwhale Cloud instance by token
    login(instance="https://cloud.starwhale.cn", alias="cloud-cn", token="xxx")

    # login to Starwhale Server instance by username and password
    login(instance="http://controller.starwhale.svc", alias="dev", username="starwhale", password="abcd1234")

    logout

    登出 server/cloud 实例, 等价于 swcli isntance logout 命令。登出 Standalone 实例是无意义的。

    def logout(instance: str) -> None:

    使用示例

    from starwhale import login, logout

    login(instance="https://cloud.starwhale.cn", alias="cloud-cn", token="xxx")
    # logout by the alias
    logout("cloud-cn")

    login(instance="http://controller.starwhale.svc", alias="dev", username="starwhale", password="abcd1234")
    # logout by the instance http url
    logout("http://controller.starwhale.svc")
    - + \ No newline at end of file diff --git a/zh/reference/sdk/overview/index.html b/zh/reference/sdk/overview/index.html index 095732c5f..c871d10c4 100644 --- a/zh/reference/sdk/overview/index.html +++ b/zh/reference/sdk/overview/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    Python SDK 概览

    Starwhale 提供一系列的 Python SDK,帮助用户管理数据集、模型和评测等调用,使用 Starwhale Python SDK 能让您更好的完成 ML/DL 开发任务。

    • class PipelineHandler: 提供默认的模型评测过程定义,需要用户实现 predictevaluate 函数。
    • class Context: 执行模型评测过程中传入的上下文信息,包括 Project、Task ID 等。
    • class Dataset: Starwhale 数据集类。
    • class starwhale.api.service.Service: 在线评测的基础类。
    • class Job: 提供 Job 相关的操作。
    • class Evaluation: 提供 Evaluation 的 log 和 scan 相关的操作。

    函数

    • @multi_classification: 修饰器,适用于多分类问题,用来简化 evaluate 结果的进一步计算和结果存储,能更好的呈现评测结果。
    • @handler: 修饰器,定义一个带有资源属性(mem/cpu/gpu)的运行实体,可以控制副本数。多个Handlers可以通过依赖关系,生成DAG,便于控制执行流程。
    • @evaluation.predict: 修饰器,定义模型评测中的推理过程,类似 MapReduce 中的 map 阶段。
    • @evaluation.evaluate: 修饰器,定义模型评测中的评测过程,类似 MapReduce 中的 reduce 阶段。
    • model.build: 进行 Starwhale 模型构建。
    • @fine_tune: 修饰器,定义模型训练的微调(fine-tune)过程。
    • init_logger: 设置日志输出级别,实现五种级别日志输出。
    • dataset: 获取 starwhale.Dataset 对象,包括创建新的数据集和加载已经存在的数据集两种方式。
    • @starwhale.api.service.api: 修饰器,提供基于 Gradio 的简易 Web Handler 输入定义,实现在线评测。
    • login: 登录 server/cloud 实例。
    • logout: 登出 server/cloud 实例。
    • job: 根据Job URI获得 starwhale.Job 对象。
    • @PipelineHandler.run: 修饰器,定义 PipelineHandler 子类中 predict 和 evaluate 方法的资源。

    数据类型

    • COCOObjectAnnotation: 提供COCO类型的定义。
    • BoundingBox: 边界框类型,目前为 LTWH 格式,即 left_x, top_y, widthheight
    • ClassLabel: 描述label的数量和类型。
    • Image: 图片类型。
    • GrayscaleImage: 灰度图类型,比如MNIST中数字手写体图片,是 Image 类型的一个特例。
    • Audio: 音频类型。
    • Video: 视频类型。
    • Text: 文本类型,默认为 utf-8 格式,用来存储大文本。
    • Binary: 二进制类型,用 bytes 存储,用来存储比较大的二进制内容。
    • Line: 直线类型。
    • Point: 点类型。
    • Polygon: 多边形类型。
    • Link: Link类型,用来制作 remote-link 类型的数据。
    • MIMEType: 描述 Starwhale 支持的多媒体类型,用在 ImageVideo 等类型的 mime_type 属性上,能更好的进行 Dataset Viewer。

    其他

    • __version__: Starwhale Python SDK 和 swcli 版本,是字符串常量。

    进一步阅读建议

    - + \ No newline at end of file diff --git a/zh/reference/sdk/type/index.html b/zh/reference/sdk/type/index.html index 9df133dc2..9a43a2579 100644 --- a/zh/reference/sdk/type/index.html +++ b/zh/reference/sdk/type/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    Starwhale 数据类型 SDK

    COCOObjectAnnotation

    提供COCO类型的定义。

    COCOObjectAnnotation(
    id: int,
    image_id: int,
    category_id: int,
    segmentation: Union[t.List, t.Dict],
    area: Union[float, int],
    bbox: Union[BoundingBox, t.List[float]],
    iscrowd: int,
    )
    参数说明
    idobject id,一般为全局object的递增id
    image_idimage id,一般为图片id
    category_idcategory id,一般为目标检测中类别的id
    segmentation物体轮廓表示,Polygon(多边形的点)或RLE格式
    areaobject面积
    bbox表示bounding box,可以为BoundingBox类型或float的列表
    iscrowd0表示是一个单独的object,1表示两个没有分开的object

    使用示例

    def _make_coco_annotations(
    self, mask_fpath: Path, image_id: int
    ) -> t.List[COCOObjectAnnotation]:
    mask_img = PILImage.open(str(mask_fpath))

    mask = np.array(mask_img)
    object_ids = np.unique(mask)[1:]
    binary_mask = mask == object_ids[:, None, None]
    # TODO: tune permute without pytorch
    binary_mask_tensor = torch.as_tensor(binary_mask, dtype=torch.uint8)
    binary_mask_tensor = (
    binary_mask_tensor.permute(0, 2, 1).contiguous().permute(0, 2, 1)
    )

    coco_annotations = []
    for i in range(0, len(object_ids)):
    _pos = np.where(binary_mask[i])
    _xmin, _ymin = float(np.min(_pos[1])), float(np.min(_pos[0]))
    _xmax, _ymax = float(np.max(_pos[1])), float(np.max(_pos[0]))
    _bbox = BoundingBox(
    x=_xmin, y=_ymin, width=_xmax - _xmin, height=_ymax - _ymin
    )

    rle: t.Dict = coco_mask.encode(binary_mask_tensor[i].numpy()) # type: ignore
    rle["counts"] = rle["counts"].decode("utf-8")

    coco_annotations.append(
    COCOObjectAnnotation(
    id=self.object_id,
    image_id=image_id,
    category_id=1, # PennFudan Dataset only has one class-PASPersonStanding
    segmentation=rle,
    area=_bbox.width * _bbox.height,
    bbox=_bbox,
    iscrowd=0, # suppose all instances are not crowd
    )
    )
    self.object_id += 1

    return coco_annotations

    GrayscaleImage

    提供灰度图类型,比如MNIST中数字手写体图片,是 Image 类型的一个特例。

    GrayscaleImage(
    fp: _TArtifactFP = "",
    display_name: str = "",
    shape: Optional[_TShape] = None,
    as_mask: bool = False,
    mask_uri: str = "",
    )
    参数说明
    fp图片的路径、IO对象或文件内容的bytes
    display_nameDataset Viewer上展示的名字
    shape图片的Width和Height,channel默认为1
    as_mask是否作为Mask图片
    mask_uriMask原图的URI

    使用示例

    for i in range(0, min(data_number, label_number)):
    _data = data_file.read(image_size)
    _label = struct.unpack(">B", label_file.read(1))[0]
    yield GrayscaleImage(
    _data,
    display_name=f"{i}",
    shape=(height, width, 1),
    ), {"label": _label}

    GrayscaleImage函数

    GrayscaleImage.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    GrayscaleImage.carry_raw_data

    carry_raw_data() -> GrayscaleImage

    GrayscaleImage.astype

    astype() -> Dict[str, t.Any]

    BoundingBox

    提供边界框类型,目前为 LTWH 格式,即 left_x, top_y, widthheight

    BoundingBox(
    x: float,
    y: float,
    width: float,
    height: float
    )
    参数说明
    xleft_x的坐标
    ytop_y的坐标
    width图片的宽度
    height图片的高度

    ClassLabel

    描述label的数量和类型。

    ClassLabel(
    names: List[Union[int, float, str]]
    )

    Image

    图片类型。

    Image(
    fp: _TArtifactFP = "",
    display_name: str = "",
    shape: Optional[_TShape] = None,
    mime_type: Optional[MIMEType] = None,
    as_mask: bool = False,
    mask_uri: str = "",
    )
    参数说明
    fp图片的路径、IO对象或文件内容的bytes
    display_nameDataset Viewer上展示的名字
    shape图片的Width、Height和channel
    mime_typeMIMEType支持的类型
    as_mask是否作为Mask图片
    mask_uriMask原图的URI

    使用示例

    import io
    import typing as t
    import pickle
    from PIL import Image as PILImage
    from starwhale import Image, MIMEType

    def _iter_item(paths: t.List[Path]) -> t.Generator[t.Tuple[t.Any, t.Dict], None, None]:
    for path in paths:
    with path.open("rb") as f:
    content = pickle.load(f, encoding="bytes")
    for data, label, filename in zip(
    content[b"data"], content[b"labels"], content[b"filenames"]
    ):
    annotations = {
    "label": label,
    "label_display_name": dataset_meta["label_names"][label],
    }

    image_array = data.reshape(3, 32, 32).transpose(1, 2, 0)
    image_bytes = io.BytesIO()
    PILImage.fromarray(image_array).save(image_bytes, format="PNG")

    yield Image(
    fp=image_bytes.getvalue(),
    display_name=filename.decode(),
    shape=image_array.shape,
    mime_type=MIMEType.PNG,
    ), annotations

    Image函数

    Image.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Image.carry_raw_data

    carry_raw_data() -> GrayscaleImage

    Image.astype

    astype() -> Dict[str, t.Any]

    Video

    视频类型。

    Video(
    fp: _TArtifactFP = "",
    display_name: str = "",
    mime_type: Optional[MIMEType] = None,
    )
    参数说明
    fp视频的路径、IO对象或文件内容的bytes
    display_nameDataset Viewer上展示的名字
    mime_typeMIMEType支持的类型

    使用示例

    import typing as t
    from pathlib import Path

    from starwhale import Video, MIMEType

    root_dir = Path(__file__).parent.parent
    dataset_dir = root_dir / "data" / "UCF-101"
    test_ds_path = [root_dir / "data" / "test_list.txt"]

    def iter_ucf_item() -> t.Generator:
    for path in test_ds_path:
    with path.open() as f:
    for line in f.readlines():
    _, label, video_sub_path = line.split()

    data_path = dataset_dir / video_sub_path
    data = Video(
    data_path,
    display_name=video_sub_path,
    shape=(1,),
    mime_type=MIMEType.WEBM,
    )

    yield f"{label}_{video_sub_path}", {
    "video": data,
    "label": label,
    }

    Audio

    音频类型。

    Audio(
    fp: _TArtifactFP = "",
    display_name: str = "",
    mime_type: Optional[MIMEType] = None,
    )
    参数说明
    fp音频文件的路径、IO对象或文件内容的bytes
    display_nameDataset Viewer上展示的名字
    mime_typeMIMEType支持的类型

    使用示例

    import typing as t
    from starwhale import Audio

    def iter_item() -> t.Generator[t.Tuple[t.Any, t.Any], None, None]:
    for path in validation_ds_paths:
    with path.open() as f:
    for item in f.readlines():
    item = item.strip()
    if not item:
    continue

    data_path = dataset_dir / item
    data = Audio(
    data_path, display_name=item, shape=(1,), mime_type=MIMEType.WAV
    )

    speaker_id, utterance_num = data_path.stem.split("_nohash_")
    annotations = {
    "label": data_path.parent.name,
    "speaker_id": speaker_id,
    "utterance_num": int(utterance_num),
    }
    yield data, annotations

    Audio函数

    Audio.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Audio.carry_raw_data

    carry_raw_data() -> Audio

    Audio.astype

    astype() -> Dict[str, t.Any]

    Text

    文本类型,默认为 utf-8 格式。

    Text(
    content: str,
    encoding: str = "utf-8",
    )
    参数说明
    contenttext内容
    encodingtext的编码格式

    使用示例

    import typing as t
    from pathlib import Path
    from starwhale import Text

    def iter_item(self) -> t.Generator[t.Tuple[t.Any, t.Any], None, None]:
    root_dir = Path(__file__).parent.parent / "data"

    with (root_dir / "fra-test.txt").open("r") as f:
    for line in f.readlines():
    line = line.strip()
    if not line or line.startswith("CC-BY"):
    continue

    _data, _label, *_ = line.split("\t")
    data = Text(_data, encoding="utf-8")
    annotations = {"label": _label}
    yield data, annotations

    Text函数

    to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Text.carry_raw_data

    carry_raw_data() -> Text

    Text.astype

    astype() -> Dict[str, t.Any]

    Text.to_str

    to_str() -> str

    Binary

    二进制类型,用bytes存储。

    Binary(
    fp: _TArtifactFP = "",
    mime_type: MIMEType = MIMEType.UNDEFINED,
    )
    参数说明
    fp路径、IO对象或文件内容的bytes
    mime_typeMIMEType支持的类型

    Binary函数

    Binary.to_types

    to_bytes(encoding: str= "utf-8") -> bytes

    Binary.carry_raw_data

    carry_raw_data() -> Binary

    Binary.astype

    astype() -> Dict[str, t.Any]

    Link类型,用来制作 remote-link 类型的数据集。

    Link(
    uri: str,
    auth: Optional[LinkAuth] = DefaultS3LinkAuth,
    offset: int = 0,
    size: int = -1,
    data_type: Optional[BaseArtifact] = None,
    )
    参数说明
    uri原始数据的uri地址,目前支持localFS和S3两种协议
    authLink Auth信息
    offset数据相对uri指向的文件偏移量
    size数据大小
    data_typeLink指向的实际数据类型,目前支持 Binary, Image, Text, AudioVideo 类型

    Link函数

    Link.astype

    astype() -> Dict[str, t.Any]

    MIMEType

    描述Starwhale支持的多媒体类型,用Python Enum类型实现,用在 ImageVideo 等类型的mime_type 属性上,能更好的进行Dataset Viewer。

    class MIMEType(Enum):
    PNG = "image/png"
    JPEG = "image/jpeg"
    WEBP = "image/webp"
    SVG = "image/svg+xml"
    GIF = "image/gif"
    APNG = "image/apng"
    AVIF = "image/avif"
    PPM = "image/x-portable-pixmap"
    MP4 = "video/mp4"
    AVI = "video/avi"
    WEBM = "video/webm"
    WAV = "audio/wav"
    MP3 = "audio/mp3"
    PLAIN = "text/plain"
    CSV = "text/csv"
    HTML = "text/html"
    GRAYSCALE = "x/grayscale"
    UNDEFINED = "x/undefined"

    Line

    描述直线。

    from starwhale import ds, Point, Line

    with dataset("collections") as ds:
    line_points = [
    Point(x=0.0, y=1.0),
    Point(x=0.0, y=100.0)
    ]
    ds.append({"line": line_points})
    ds.commit()

    Point

    描述点。

    from starwhale import ds, Point

    with dataset("collections") as ds:
    ds.append(Point(x=0.0, y=100.0))
    ds.commit()

    Polygon

    描述多边形。

    from starwhale import ds, Point, Polygon

    with dataset("collections") as ds:
    polygon_points = [
    Point(x=0.0, y=1.0),
    Point(x=0.0, y=100.0),
    Point(x=2.0, y=1.0),
    Point(x=2.0, y=100.0),
    ]
    ds.append({"polygon": polygon_points})
    ds.commit()
    - + \ No newline at end of file diff --git a/zh/reference/swcli/dataset/index.html b/zh/reference/swcli/dataset/index.html index e7d862cd6..b1fb6eeb9 100644 --- a/zh/reference/swcli/dataset/index.html +++ b/zh/reference/swcli/dataset/index.html @@ -10,7 +10,7 @@ - + @@ -21,7 +21,7 @@ | --page | N | Integer | 1 | 起始页码,仅限Server和Cloud实例。 | | --size | N | Integer | 20 | 一页中的数据集数量,仅限Server和Cloud实例。 | | --filter-fl | N | String | | 仅显示符合条件的数据集。该选项可以在一个命令中被多次重复使用。 |

    过滤器类型说明范例
    nameKey-Value数据集名称前缀--filter name=mnist
    ownerKey-Value数据集所有者名字--filter owner=starwhale
    latestFlag如果指定了该选项,结果中仅显示最新版本。--filter latest

    swcli dataset recover

    swcli [全局选项] dataset recover [选项] <DATASET>

    dataset recover 恢复以前删除的Starwhale数据集或版本。

    DATASET 是一个数据集URI。如果URI不包含版本,则会恢复所有删除的版本。

    已经被垃圾回收或者使用 --force 选项删除的 Starwhale 数据集或版本无法使用本命令恢复。

    选项必填项类型默认值说明
    --force-fNBooleanFalse如果使用了该选项,当前同名的Starwhale数据集或版本会被强制覆盖。

    swcli dataset remove

    swcli [全局选项] dataset remove [选项] <DATASET>

    dataset remove 删除指定的 Starwhale 数据集或某个版本。

    DATASET 是一个数据集URI。如果URI不包含版本,则删除指定数据集的所有版本。软删除的 Starwhale 数据集,可以通过 swcli dataset recover 命令进行恢复(未进行垃圾回收)。

    被删除的Starwhale数据集或版本可以通过 swcli dataset list --show-removed 列出。

    选项必填项类型默认值说明
    --force-fNBooleanFalse使用此选项永久删除某个Starwhale数据集或版本。删除后不可恢复。

    swcli dataset summary

    swcli [全局选项] dataset summary <DATASET>

    显示数据集摘要信息。DATASET 是一个数据集URI

    swcli dataset tag

    swcli [全局选项] dataset tag [选项] <DATASET> [TAGS]...

    dataset tag 将标签附加到指定的Starwhale数据集版本,同时支持删除和列出所有标签的功能。可以在数据集URI中使用标签替代版本ID。

    DATASET是一个数据集URI

    每个数据集版本可以包含任意数量的标签,但同一数据集中不允许有重复的标签名称。

    选项必填项类型默认值说明
    --remove-rNBooleanFalse使用该选项删除标签
    --quiet-qNBooleanFalse使用该选项以忽略错误,例如删除不存在的标签。
    --force-add-fNBooleanFalse当向 server/cloud 实例中添加标签时,若遇到其他版本的数据集已经使用该标签会提示报错,强制更新时可以使用 --force-add 参数。

    数据集标签的例子

    #- list tags of the mnist dataset
    swcli dataset tag mnist

    #- add tags for the mnist dataset
    swcli dataset tag mnist t1 t2
    swcli dataset tag cloud://cloud.starwhale.cn/project/public:starwhale/dataset/mnist/version/latest t1 --force-ad
    swcli dataset tag mnist t1 --quiet

    #- remove tags for the mnist dataset
    swcli dataset tag mnist -r t1 t2
    swcli dataset tag cloud://cloud.starwhale.cn/project/public:starwhale/dataset/mnist --remove t1
    - + \ No newline at end of file diff --git a/zh/reference/swcli/index.html b/zh/reference/swcli/index.html index 300a5123b..c222204c9 100644 --- a/zh/reference/swcli/index.html +++ b/zh/reference/swcli/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    概述

    使用方式

    swcli [选项] <COMMAND> [参数]...
    备注

    swcliswstarwhale三个命令的作用是一样的。

    全局选项

    选项说明
    --version显示swcli的版本信息。
    --verbose-v日志中输出更多信息,当 -v 参数越多,呈现信息越多,最多支持4个 -v 参数。
    --help输出命令帮助信息。
    警告

    需要注意的是,全局参数需要跟在swcli之后,命令之前。

    命令

    - + \ No newline at end of file diff --git a/zh/reference/swcli/instance/index.html b/zh/reference/swcli/instance/index.html index 199f0fb13..3d2a4928d 100644 --- a/zh/reference/swcli/instance/index.html +++ b/zh/reference/swcli/instance/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    swcli instance

    概述

    swcli [全局选项] instance [选项] <SUBCOMMAND> [参数]

    instance命令包括以下子命令:

    • info
    • list (ls)
    • login
    • logout
    • use (select)

    swcli instance info

    swcli [全局选项] instance info [选项] <INSTANCE>

    instance info 输出指定 Starwhale 实例的详细信息。

    INSTANCE 是一个实例URI

    swcli instance list

    swcli [全局选项] instance list [选项]

    instance list 显示所有的 Starwhale 实例。

    swcli instance login

    swcli [全局选项] instance login [选项] <INSTANCE>

    instance login 连接到一个 Server/Cloud 实例并将它设置为默认实例.

    INSTANCE 是一个实例URI

    选项必填项类型默认值说明
    --usernameNString登录用户名
    --passwordNString登录密码
    --tokenNString登录令牌
    --aliasYString实例别名。您可以在任何需要实例URI的地方使用对应的别名替代。

    --username--password 不能和 --token 一起使用。

    swcli instance logout

    swcli [全局选项] instance logout [INSTANCE]

    instance logout 断开和 Server/Cloud 实例的连接并清除本地保存的信息。

    INSTANCE是一个实例URI。如果不指定,将使用默认实例

    swcli instance use

    swcli [全局选项] instance use <INSTANCE>

    instance use 将指定的实例设置为默认实例.

    INSTANCE 是一个实例URI

    - + \ No newline at end of file diff --git a/zh/reference/swcli/job/index.html b/zh/reference/swcli/job/index.html index 7dc9c9a0d..204ba821e 100644 --- a/zh/reference/swcli/job/index.html +++ b/zh/reference/swcli/job/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    swcli job

    概述

    swcli [全局选项] job [选项] <子命令> [参数]...

    job命令包括以下子命令:

    • cancel
    • info
    • list(ls)
    • pause
    • recover
    • remove(rm)
    • resume

    swcli job cancel

    swcli [全局选项] job cancel [选项] <JOB>

    job cancel 停止指定的作业。该命令在 Standalone 实例下只对容器方式运行的作业生效。

    JOB 是一个作业URI

    选项必填项类型默认值说明
    --force or -fNBooleanFalse如果为真,强制停止指定的作业。

    swcli job info

    swcli [全局选项] job info [选项] <JOB>

    job info 输出指定作业的详细信息。

    JOB 是一个作业URI

    swcli job list

    swcli [全局选项] job list [选项]

    job list显示所有的 Starwhale 作业。

    选项必填项类型默认值说明
    --projectNString要查看的项目的 URI。如果未指定此选项,则使用默认项目替代。
    --show-removedNBooleanFalse如果使用了该选项,则结果中会包含已删除但未被垃圾回收的作业。
    --pageNInteger1起始页码。仅限 Server 和 Cloud 实例。
    --sizeNInteger20一页中的作业数。仅限 Server 和 Cloud 实例。

    swcli job pause

    swcli [全局选项] job pause [选项] <JOB>

    job pause 暂停指定的作业. 被暂停的作业可以使用 job resume 恢复。该命令在 Standalone 实例下只对容器方式运行的作业生效。

    JOB 是一个作业URI

    pausecancel 功能上基本相同。它们的差别在于被暂停的作业会保留作业ID,在恢复时继续使用。作业的开发者需要定期保存作业数据并在恢复的时候重新加载相关数据。作业ID 可以用作保存数据的键值。

    选项必填项类型默认值说明
    --force or -fNBooleanFalse如果为真,强制停止指定的作业。

    swcli job resume

    swcli [全局选项] job resume [选项] <JOB>

    job resume 恢复指定的作业。该命令在 Standalone 实例下只对容器方式运行的作业生效。

    JOB 是一个作业URI

    - + \ No newline at end of file diff --git a/zh/reference/swcli/model/index.html b/zh/reference/swcli/model/index.html index d84626adf..bbcb6dcb0 100644 --- a/zh/reference/swcli/model/index.html +++ b/zh/reference/swcli/model/index.html @@ -10,14 +10,14 @@ - +
    版本:0.6.4

    swcli model

    概述

    swcli [全局选项] model [选项] <SUBCOMMAND> [参数]...

    model命令包括以下子命令:

    • build
    • copy(cp)
    • diff
    • extract
    • history
    • info
    • list(ls)
    • recover
    • remove(rm)
    • run
    • serve
    • tag

    swcli model build

    swcli [全局选项] model build [选项] <WORKDIR>

    model build 会将整个 WORKDIR 打包到Starwhale模型中,.swignore匹配的文件除外。

    model build 会导入 --module 参数指定的模块,然后生成运行模型所需要的配置。如果您指定的模块依赖第三方库,我们强烈建议您使用 --runtime 选项。如果不指定该选项,您需要确保 swcli 所使用的 Python 环境已经安装了相关的依赖库。

    选项必填项类型默认值说明
    --project-pNString默认项目项目URI
    --model-yaml-fNString${workdir}/model.yamlmodel.yaml 文件路径,默认会尝试使用 ${workdir}/model.yaml 文件。model.yaml 对于模型构建并非必需的。
    --module-mNString构建时导入的模块。Starwhale 会将这些模块中包含的 handler 导出到模型包。该参数可以指定多次,用来导入多个 Python 模块。
    --runtimeNString运行此命令时使用的 Starwhale Runtime的URI。如果指定此选项,该命令将在 Starwhale 运行时指定的独立 Python 环境中运行。否则它将直接在 swcli 当前的 Python 环境中运行。
    --name-nNString模型包的名字
    --desc-dNString模型包的描述
    --package-runtime--no-package-runtimeNBooleanTrue当使用 --runtime 参数时,默认情况下,会将对应的 Starwhale 运行时变成 Starwhale 模型的内置运行时。可以通过 --no-package-runtime 参数禁用该特性。
    --add-allNBooleanFalseStarwhale 模型打包的时候会自动忽略一些类似 pyc/venv/conda 构建目录等,可以通过该参数将这些文件也进行打包。即使该参数使用,也不影响 .swignore 文件的预期作用。
    -t--tagN全局String

    Starwhale 模型构建的例子

    # build by the model.yaml in current directory and model package will package all the files from the current directory.
    swcli model build .
    # search model run decorators from mnist.evaluate, mnist.train and mnist.predict modules, then package all the files from the current directory to model package.
    swcli model build . --module mnist.evaluate --module mnist.train --module mnist.predict
    # build model package in the Starwhale Runtime environment.
    swcli model build . --module mnist.evaluate --runtime pytorch/version/v1
    # forbid to package Starwhale Runtime into the model.
    swcli model build . --module mnist.evaluate --runtime pytorch/version/v1 --no-package-runtime
    # build model package with tags.
    swcli model build . --tag tag1 --tag tag2

    swcli model copy

    swcli [全局选项] model copy [选项] <SRC> <DEST>

    model copy 将模型从 SRC 复制到 DEST,用来实现不同实例的模型分享。这里 SRCDEST 都是模型URI

    Starwhale 模型复制时,默认会带上用户自定义的所有标签,可以使用 --ignore-tag 参数,忽略某些标签。另外,latest^v\d+$ 标签是 Starwhale 系统内建标签,只在当前实例中使用,不会拷贝到其他实例中。

    选项必填项类型默认值说明
    --force-fNBooleanFalse如果为true,DEST已经存在时会被强制覆盖。否则此命令会显示一条错误消息。另外,如果复制时携带的标签已经被其他版本使用,通过该参数可以强制更新标签到此版本上。
    -i--ignore-tagNString可以指定多次,忽略多个用户自定义标签。

    Starwhale 模型复制的例子

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local project(myproject) with a new model name 'mnist-local'
    swcli model cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq local/project/myproject/mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local default project(self) with the cloud instance model name 'mnist-cloud'
    swcli model cp cloud://pre-k8s/project/model/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq .

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local project(myproject) with the cloud instance model name 'mnist-cloud'
    swcli model cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq . -dlp myproject

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local default project(self) with a model name 'mnist-local'
    swcli model cp cloud://pre-k8s/project/model/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud model to local project(myproject) with a model name 'mnist-local'
    swcli model cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local -dlp myproject

    #- copy standalone instance(local) default project(self)'s mnist-local model to cloud instance(pre-k8s) mnist project with a new model name 'mnist-cloud'
    swcli model cp mnist-local/version/latest cloud://pre-k8s/project/mnist/mnist-cloud

    #- copy standalone instance(local) default project(self)'s mnist-local model to cloud instance(pre-k8s) mnist project with standalone instance model name 'mnist-local'
    swcli model cp mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy standalone instance(local) default project(self)'s mnist-local model to cloud instance(pre-k8s) mnist project without 'cloud://' prefix
    swcli model cp mnist-local/version/latest pre-k8s/project/mnist

    #- copy standalone instance(local) project(myproject)'s mnist-local model to cloud instance(pre-k8s) mnist project with standalone instance model name 'mnist-local'
    swcli model cp local/project/myproject/model/mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy without some tags
    swcli model cp mnist cloud://cloud.starwhale.cn/project/starwhale:public --ignore-tag t1

    swcli model diff

    swcli [全局选项] model diff [选项] <MODEL VERSION> <MODEL VERSION>

    model diff 比较同一模型的两个版本之间的差异。

    MODEL VERSION 是一个模型URI

    选项必填项类型默认值说明
    --show-detailsNBooleanFalse使用该选项输出详细的差异信息。

    swcli model extract

    swcli [全局选项] model extract [选项] <MODEL> <TARGET_DIR>

    model extract 能够对将Starwhale 模型解压到指定目录中,方便进行后续改造。

    MODEL 是一个模型URI

    选项必填项类型默认值说明
    --force-fNBooleanFalse如果使用了该选项,会强制覆盖目标目录已经存在的模型解压文件。

    Starwhale 模型解压的例子

    #- extract mnist model package to current directory
    swcli model extract mnist/version/xxxx .

    #- extract mnist model package to current directory and force to overwrite the files
    swcli model extract mnist/version/xxxx . -f

    swcli model history

    swcli [全局选项] model history [选项] <MODEL>

    model history输出指定Starwhale模型的所有历史版本。

    MODEL是一个模型URI

    选项必填项类型默认值说明
    --fullnameNBooleanFalse显示完整的版本名称。如果没有使用该选项,则仅显示前 12 个字符。

    swcli model info

    swcli [全局选项] model info [选项] <MODEL>

    model info输出指定Starwhale模型版本的详细信息。

    MODEL是一个模型URI

    选项必填项类型默认值说明
    --output-filter-ofNChoice of [basic/model_yaml/manifest/files/handlers/all]basic设置输出的过滤规则,比如只显示Model的model.yaml。目前该参数仅对Standalone Instance的Model生效。

    Starwhale 模型信息查看的例子

    swcli model info mnist # show basic info from the latest version of model
    swcli model info mnist/version/v0 # show basic info from the v0 version of model
    swcli model info mnist/version/latest --output-filter=all # show all info
    swcli model info mnist -of basic # show basic info
    swcli model info mnist -of model_yaml # show model.yaml
    swcli model info mnist -of handlers # show model runnable handlers info
    swcli model info mnist -of files # show model package files tree
    swcli -o json model info mnist -of all # show all info in json format

    swcli model list

    swcli [全局选项] model list [选项]

    model list显示所有的Starwhale模型。

    选项必填项类型默认值说明
    --projectNString要查看的项目的URI。如果未指定此选项,则使用默认项目替代。
    --fullnameNBooleanFalse显示完整的版本名称。如果没有使用该选项,则仅显示前 12 个字符。
    --show-removedNBooleanFalse如果使用了该选项,则结果中会包含已删除但未被垃圾回收的模型。
    --pageNInteger1起始页码。仅限Server和Cloud实例。
    --sizeNInteger20一页中的模型数。仅限Server和Cloud实例。
    --filter-flNString仅显示符合条件的模型。该选项可以在一个命令中被多次重复使用。
    过滤器类型说明范例
    nameKey-Value模型名称前缀--filter name=mnist
    ownerKey-Value模型所有者名字--filter owner=starwhale
    latestFlag如果指定了该选项,结果中仅显示最新版本。--filter latest

    swcli model recover

    swcli [全局选项] model recover [选项] <MODEL>

    model recover 恢复以前删除的 Starwhale 模型或版本。

    MODEL是一个模型URI。如果 URI 不包含版本,则会恢复所有删除的版本。

    已经被垃圾回收或者使用 --force 选项删除的 Starwhale 模型或版本无法使用本命令恢复。

    选项必填项类型默认值说明
    --force-fNBooleanFalse如果使用了该选项,当前同名的 Starwhale 模型或版本会被强制覆盖。

    swcli model remove

    swcli [全局选项] model remove [选项] <MODEL>

    model remove 删除指定的 Starwhale 模型或某个版本。

    MODEL 是一个模型URI。如果URI不包含版本,则删除指定模型的所有版本。

    被删除的 Starwhale 模型或版本可以在垃圾回收之前通过 swcli model recover 恢复。要永久删除某个Starwhale模型或版本,您可以使用 --force 选项。

    被删除的 Starwhale 模型或版本可以通过 swcli model list --show-removed 列出。

    选项必填项类型默认值说明
    --force-fNBooleanFalse使用此选项永久删除某个Starwhale模型或版本。删除后不可恢复。

    swcli model run

    swcli [全局选项] model run [选项]

    model run 运行一个模型的 Handler。该命令提供两种模式: model URI模式和本地开发模式。 model URI模式需要一个预先构建好的模型包,本地开发模式仅需要 model 代码目录即可。

    选项必填项类型默认值说明
    --workdir-wNString在本地开发模式中使用,指定 model 代码目录地址。
    --uri-uNString在model URI模式中使用,指定 model URI。
    --handler-hNString运行的Handler索引或名字,默认运行第一个Handler。格式为序号或Handler的名字。
    --module-mNStringPython Module 的名字,是一个可以被 import 的 Python Module 路径。该参数可以被设置多次。
    --runtime-rNString运行此命令时使用的Starwhale Runtime的 URI。如果指定此选项,该命令将在 Starwhale 运行时指定的独立 Python 环境中运行。否则它将直接在 swcli 当前的 Python 环境中运行。
    --model-yaml-fNString${MODEL_DIR}/model.yamlmodel.yaml 的路径。model.yaml 对于 model run 是非必须的。
    --run-project-pNString默认的 ProjectProject URI,表示 model run 的结果存储到对应的项目中。
    --dataset-dNStringDataset URI,模型运行所需要的 Starwhale 数据集。该参数可以被设置多次。
    --dataset-head-dhNInteger0在 Standalone Instance 下,用于调试目的,一般只需要运行数据集的一部分数据即可,可以通过 --dataset-head 参数来设定。默认值为0,表示会使用数据集所有数据。
    --in-containerNBooleanFalse使用docker镜像来运行模型。此选项仅适用于 Standalone 实例。Server 和 Cloud 实例始终使用 docker 镜像。如果指定的 runtime 是基于 docker 镜像构建的,此选项总是为真。
    --forbid-snapshot-fsNBooleanFalse当在model URI模式下,每次模型运行,都会使用一个全新的快照目录,设置该参数后直接使用模型的 workdir 目录作为运行目录。本地开发模式下,此参数不生效,每次运行都是在 --workdir 指定的目录中。
    -- --user-arbitrary-argsNString你在handlers中预设的参数 赋值.

    Starwhale 模型运行的例子

    # --> run by model uri
    # run the first handler from model uri
    swcli model run -u mnist/version/latest
    # run index id(1) handler from model uri
    swcli model run --uri mnist/version/latest --handler 1
    # run index fullname(mnist.evaluator:MNISTInference.cmp) handler from model uri
    swcli model run --uri mnist/version/latest --handler mnist.evaluator:MNISTInference.cmp

    # --> run by the working directory, which does not build model package yet. Make local debug happy.
    # run the first handler from the working directory, use the model.yaml in the working directory
    swcli model run -w .
    # run index id(1) handler from the working directory, search mnist.evaluator module and model.yaml handlers(if existed) to get runnable handlers
    swcli model run --workdir . --module mnist.evaluator --handler 1
    # run index fullname(mnist.evaluator:MNISTInference.cmp) handler from the working directory, search mnist.evaluator module to get runnable handlers
    swcli model run --workdir . --module mnist.evaluator --handler mnist.evaluator:MNISTInference.cmp
    # run the f handler in th.py from the working directory with the args defined in th:f
    # @handler()
    # def f(
    # x=ListInput(IntInput()),
    # y=2,
    # mi=MyInput(),
    # ds=DatasetInput(required=True),
    # ctx=ContextInput(),
    # )
    swcli model run -w . -m th --handler th:f -- -x 2 -x=1 --mi=blab-la --ds mnist

    # --> run with dataset of head 10
    swcli model run --uri mnist --dataset-head 10 --dataset mnist

    swcli model serve

    swcli [全局选项] model serve [选项]

    model serve 命令可以以Web Server方式运行模型,并提供简易的 Web 交互界面。

    选项必填项类型默认值说明
    --workdir-wNString在本地开发模式中使用,指定 model 代码目录地址。
    --uri-uNString在 model URI模式中使用,指定 model URI。
    --runtime-rNString运行此命令时使用的Starwhale Runtime的 URI。如果指定此选项,该命令将在 Starwhale 运行时指定的独立 Python 环境中运行。否则它将直接在 swcli 当前的 Python 环境中运行。
    --model-yaml-fNString${MODEL_DIR}/model.yamlmodel.yaml 的路径。model.yaml 对于 model serve 是非必须的。
    --module-mNStringPython Module 的名字,是一个可以被 import 的 Python Module 路径。该参数可以被设置多次。
    --hostNString127.0.0.1服务监听的地址
    --portNInteger8080服务监听的端口

    Starwhale 模型 Serving 的例子

    swcli model serve -u mnist
    swcli model serve --uri mnist/version/latest --runtime pytorch/version/latest

    swcli model serve --workdir . --runtime pytorch/version/v0
    swcli model serve --workdir . --runtime pytorch/version/v1 --host 0.0.0.0 --port 8080
    swcli model serve --workdir . --runtime pytorch --module mnist.evaluator

    swcli model tag

    swcli [全局选项] model tag [选项] <MODEL> [TAGS]...

    model tag将标签附加到指定的Starwhale模型版本,同时支持删除和列出所有标签的功能。可以在模型URI中使用标签替代版本ID。

    MODEL是一个模型URI

    每个模型版本可以包含任意数量的标签,但同一模型中不允许有重复的标签名称。

    选项必填项类型默认值说明
    --remove-rNBooleanFalse使用该选项删除标签
    --quiet-qNBooleanFalse使用该选项以忽略错误,例如删除不存在的标签。
    --force-add-fNBooleanFalse当向 server/cloud 实例中添加标签时,若遇到其他版本的模型已经使用该标签会提示报错,强制更新时可以使用 --force-add 参数。

    Starwhale 模型标签的例子

    #- list tags of the mnist model
    swcli model tag mnist

    #- add tags for the mnist model
    swcli model tag mnist t1 t2
    swcli model tag cloud://cloud.starwhale.cn/project/public:starwhale/model/mnist/version/latest t1 --force-add
    swcli model tag mnist t1 --quiet

    #- remove tags for the mnist model
    swcli model tag mnist -r t1 t2
    swcli model tag cloud://cloud.starwhale.cn/project/public:starwhale/model/mnist --remove t1
    - + \ No newline at end of file diff --git a/zh/reference/swcli/project/index.html b/zh/reference/swcli/project/index.html index 78c78e7e4..6cc12056c 100644 --- a/zh/reference/swcli/project/index.html +++ b/zh/reference/swcli/project/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    swcli project

    Overview

    swcli [全局选项] project [选项] <子命令> [参数]...

    project命令包括以下子命令:

    • create(add, new)
    • info
    • list(ls)
    • recover
    • remove(ls)
    • use(select)

    swcli project create

    swcli [全局选项] project create <PROJECT>

    project create 创建一个新的项目。

    PROJECT 是一个项目URI

    swcli project info

    swcli [全局选项] project info [选项] <PROJECT>

    project info 输出指定项目的详细信息。

    PROJECT 是一个项目URI

    swcli project list

    swcli [全局选项] project list [选项]

    project list 显示所有的项目。

    选项必填项类型默认值说明
    --instanceNString要显示的实例 URI。如果不指定该选项,则显示默认实例.
    --show-removedNBooleanFalse如果使用了该选项,则结果中会包含已删除但未被垃圾回收的项目。
    --pageNInteger1起始页码。仅限 Server 和 Cloud 实例。
    --sizeNInteger20一页中的项目数。仅限 Server 和 Cloud 实例。

    swcli project recover

    swcli [全局选项] project recover [选项] <PROJECT>

    project recover 恢复以前删除的项目。

    PROJECT 是一个项目URI

    已经被垃圾回收或者使用 --force 选项删除的项目无法使用本命令恢复。

    swcli project remove

    swcli [全局选项] project remove [选项] <PROJECT>

    project remove 删除指定的项目。

    PROJECT 是一个项目URI

    被删除的项目可以在垃圾回收之前通过 swcli project recover 恢复。要永久删除某个项目,您可以使用 --force 选项。

    被删除的项目可以通过 swcli project list --show-removed 列出。

    选项必填项类型默认值说明
    --force-fNBooleanFalse使用此选项永久删除某个 Starwhale 模型或版本。删除后不可恢复。

    swcli project use

    swcli [全局选项] project use <PROJECT>

    project use 将指定的项目设置为默认项目。如果要指定 Server/Cloud 实例上的项目,您需要先登录才能运行本命令。

    - + \ No newline at end of file diff --git a/zh/reference/swcli/runtime/index.html b/zh/reference/swcli/runtime/index.html index 2e82e791b..697ada2dc 100644 --- a/zh/reference/swcli/runtime/index.html +++ b/zh/reference/swcli/runtime/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    swcli runtime

    概述

    swcli [全局选项] runtime [选项] <SUBCOMMAND> [参数]...

    runtime 命令包括以下子命令:

    • activate(actv)
    • build
    • copy(cp)
    • dockerize
    • extract
    • history
    • info
    • list(ls)
    • recover
    • remove(rm)
    • tag

    swcli runtime activate

    swcli [全局选项] runtime activate [选项] <RUNTIME>

    runtime activate 根据指定的运行时创建一个全新的 Python 环境,类似 source venv/bin/activateconda activate xxx 的效果。关闭当前 shell 或切换到其他 shell 后,需要重新激活 Runtime。URI 参数为 Runtime URI。

    对于已经激活的 Starwhale 运行时,如果想要退出该环境,需要在 venv 环境中执行 deactivate 命令或conda环境中执行 conda deactivate 命令。

    runtime activate 命令首次激活环境的时候,会根据 Starwhale 运行时的定义,构建一个 Python 隔离环境,并下载相关的 Python Packages ,可能会花费比较的时间。

    swcli runtime build

    swcli [全局选项] runtime build [选项]

    runtime build 命令可以从多种环境或 runtime.yaml ,构建一个可以分享、可以复现的适合 ML/DL 领域的运行环境。

    参数说明

    • 运行时构建方式的相关参数:
    选项必填项类型默认值说明
    -c--condaNString通过 conda env name 寻找对应的 conda 环境,导出 Python 依赖后生成 Starwhale 运行时。
    -cp--conda-prefixNString通过 conda env prefix 路径寻找对应的 conda 环境,导出 Python 依赖后生成 Starwhale 运行时。
    -v--venvNString通过 venv 目录地址寻找对应的 venv 环境,导出 Python 依赖后生成 Starwhale 运行时。
    -s--shellNString根据当前 shell 环境,导出 Python 依赖后生成 Starwhale 运行时。
    -y--yamlNcwd 目录的 runtime.yaml根据用户自定义的 runtime.yaml 构建 Starwhale 运行时。
    -d--dockerNString将 docker image 作为 Starwhale 运行时。

    运行时构建方式的相关参数是互斥的,只能指定一种方式,如果不指定,则会采用 --yaml 方式读取 cwd 目录下的 runtime.yaml 文件进行 Starwhale 运行时的构建。

    • 其他参数:
    选项必填项作用域类型默认值说明
    --project-pN全局String默认项目项目URI
    -del--disable-env-lockNruntime.yaml 模式BooleanFalse是否安装 runtime.yaml 中的依赖,并锁定相关依赖的版本信息。默认会锁定依赖。
    -nc--no-cacheNruntime.yaml 模式BooleanFalse是否删除隔离环境,全新安装相关依赖。默认会在之前的隔离环境中安装依赖。
    --cudaNconda/venv/shell 模式Choice[11.3/11.4/11.5/11.6/11.7/]CUDA 版本,默认不使用 CUDA。
    --cudnnNconda/venv/shell 模式Choice[8/]cuDNN 版本,默认不使用 cuDNN。
    --archNconda/venv/shell 模式Choice[amd64/arm64/noarch]noarch体系结构
    -dpo--dump-pip-optionsN全局BooleanFalse~/.pip/pip.conf 导出 pip 的配置参数。
    -dcc--dump-condarcN全局BooleanFalse~/.condarc 导出 conda 的配置参数。
    -t--tagN全局String用户自定义标签,可以指定多次。

    Starwhale 运行时构建的例子

    #- from runtime.yaml:
    swcli runtime build # use the current directory as the workdir and use the default runtime.yaml file
    swcli runtime build -y example/pytorch/runtime.yaml # use example/pytorch/runtime.yaml as the runtime.yaml file
    swcli runtime build --yaml runtime.yaml # use runtime.yaml at the current directory as the runtime.yaml file
    swcli runtime build --tag tag1 --tag tag2

    #- from conda name:
    swcli runtime build -c pytorch # lock pytorch conda environment and use `pytorch` as the runtime name
    swcli runtime build --conda pytorch --name pytorch-runtime # use `pytorch-runtime` as the runtime name
    swcli runtime build --conda pytorch --cuda 11.4 # specify the cuda version
    swcli runtime build --conda pytorch --arch noarch # specify the system architecture

    #- from conda prefix path:
    swcli runtime build --conda-prefix /home/starwhale/anaconda3/envs/pytorch # get conda prefix path by `conda info --envs` command

    #- from venv prefix path:
    swcli runtime build -v /home/starwhale/.virtualenvs/pytorch
    swcli runtime build --venv /home/starwhale/.local/share/virtualenvs/pytorch --arch amd64

    #- from docker image:
    swcli runtime build --docker pytorch/pytorch:1.9.0-cuda11.1-cudnn8-runtime # use the docker image as the runtime directly

    #- from shell:
    swcli runtime build -s --cuda 11.4 --cudnn 8 # specify the cuda and cudnn version
    swcli runtime build --shell --name pytorch-runtime # lock the current shell environment and use `pytorch-runtime` as the runtime name

    swcli runtime copy

    swcli [全局选项] runtime copy [选项] <SRC> <DEST>

    runtime copy 将 runtime 从 SRC 复制到 DEST,可以实现不同实例之间的运行时分享。这里 SRCDEST 都是运行时URI

    Starwhale 运行时复制时,默认会带上用户自定义的所有标签,可以使用 --ignore-tag 参数,忽略某些标签。另外,latest^v\d+$ 标签是 Starwhale 系统内建标签,只在当前实例中使用,不会拷贝到其他实例中。

    选项必填项类型默认值说明
    --force-fNBooleanFalse如果为true,DEST已经存在时会被强制覆盖。否则此命令会显示一条错误消息。另外,如果复制时携带的标签已经被其他版本使用,通过该参数可以强制更新标签到此版本上。
    -i--ignore-tagNString可以指定多次,忽略多个用户自定义标签。

    Starwhale 运行时复制的例子

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local project(myproject) with a new runtime name 'mnist-local'
    swcli runtime cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq local/project/myproject/mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local default project(self) with the cloud instance runtime name 'mnist-cloud'
    swcli runtime cp cloud://pre-k8s/project/runtime/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq .

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local project(myproject) with the cloud instance runtime name 'mnist-cloud'
    swcli runtime cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq . -dlp myproject

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local default project(self) with a runtime name 'mnist-local'
    swcli runtime cp cloud://pre-k8s/project/runtime/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local

    #- copy cloud instance(pre-k8s) mnist project's mnist-cloud runtime to local project(myproject) with a runtime name 'mnist-local'
    swcli runtime cp cloud://pre-k8s/project/mnist/mnist-cloud/version/ge3tkylgha2tenrtmftdgyjzni3dayq mnist-local -dlp myproject

    #- copy standalone instance(local) default project(self)'s mnist-local runtime to cloud instance(pre-k8s) mnist project with a new runtime name 'mnist-cloud'
    swcli runtime cp mnist-local/version/latest cloud://pre-k8s/project/mnist/mnist-cloud

    #- copy standalone instance(local) default project(self)'s mnist-local runtime to cloud instance(pre-k8s) mnist project with standalone instance runtime name 'mnist-local'
    swcli runtime cp mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy standalone instance(local) default project(self)'s mnist-local runtime to cloud instance(pre-k8s) mnist project without 'cloud://' prefix
    swcli runtime cp mnist-local/version/latest pre-k8s/project/mnist

    #- copy standalone instance(local) project(myproject)'s mnist-local runtime to cloud instance(pre-k8s) mnist project with standalone instance runtime name 'mnist-local'
    swcli runtime cp local/project/myproject/runtime/mnist-local/version/latest cloud://pre-k8s/project/mnist

    #- copy without some tags
    swcli runtime cp pytorch cloud://cloud.starwhale.cn/project/starwhale:public --ignore-tag t1

    swcli runtime dockerize

    swcli [全局选项] runtime dockerize [选项] <RUNTIME>

    runtime dockerize 基于指定的 runtime 创建一个 docker 镜像。Starwhale 使用 docker buildx 来创建镜像。运行此命令需要预先安装 Docker 19.03 以上的版本。

    RUNTIME是一个运行时URI

    选项必填项类型默认值说明
    --tag or -tNStringDocker镜像的tag,该选项可以重复多次。
    --pushNBooleanFalse是否将创建的镜像推送到docker registry。
    --platformNStringamd64镜像的运行平台,可以是amd64或者arm64。该选项可以重复多次用于创建多平台镜像。
    --dry-runNBooleanFalse只生成 Dockerfile 不实际生成和推送镜像。

    swcli runtime extract

    swcli [全局选项] runtime extract [选项] <RUNTIME>

    Starwhale 运行时以压缩包的方式分发,使用 runtime extract 命令可以解压运行时 Package,然后进行后续的自定义修改。

    选项必填项类型默认值说明
    --force-fNBooleanFalse如果目标目录已经有解压好的 Starwhale 运行时,是否删除后重新解压。
    --target-dirNString自定义解压的目录,如果不指定则会放到 Starwhale 默认的运行时 workdir 目录中,命令输出日志中会提示。

    swcli runtime history

    swcli [全局选项] runtime history [选项] <RUNTIME>

    runtime history输出指定Starwhale运行时的所有历史版本。

    RUNTIME是一个运行时URI

    选项必填项类型默认值说明
    --fullnameNBooleanFalse显示完整的版本名称。如果没有使用该选项,则仅显示前 12 个字符。

    swcli runtime info

    swcli [全局选项] runtime info [选项] <RUNTIME>

    runtime info输出指定Starwhale运行时版本的详细信息。

    RUNTIME是一个运行时URI

    选项必填项类型默认值说明
    --output-filter-ofNChoice of [basic/runtime_yaml/manifest/lock/all]basic设置输出的过滤规则,比如只显示Runtime的runtime.yaml。目前该参数仅对Standalone Instance的Runtime生效。

    Starwhale 运行时查看详情的例子

    swcli runtime info pytorch # show basic info from the latest version of runtime
    swcli runtime info pytorch/version/v0 # show basic info
    swcli runtime info pytorch/version/v0 --output-filter basic # show basic info
    swcli runtime info pytorch/version/v1 -of runtime_yaml # show runtime.yaml content
    swcli runtime info pytorch/version/v1 -of lock # show auto lock file content
    swcli runtime info pytorch/version/v1 -of manifest # show _manifest.yaml content
    swcli runtime info pytorch/version/v1 -of all # show all info of the runtime

    swcli runtime list

    swcli [全局选项] runtime list [选项]

    runtime list显示所有的 Starwhale 运行时。

    选项必填项类型默认值说明
    --projectNString要查看的项目的URI。如果未指定此选项,则使用默认项目替代。
    --fullnameNBooleanFalse显示完整的版本名称。如果没有使用该选项,则仅显示前 12 个字符。
    --show-removedNBooleanFalse如果使用了该选项,则结果中会包含已删除但未被垃圾回收的运行时。
    --pageNInteger1起始页码。仅限Server和Cloud实例。
    --sizeNInteger20一页中的运行时数量。仅限Server和Cloud实例。
    --filter-flNString仅显示符合条件的运行时。该选项可以在一个命令中被多次重复使用。
    过滤器类型说明范例
    nameKey-Value运行时名称前缀--filter name=pytorch
    ownerKey-Value运行时所有者名字--filter owner=starwhale
    latestFlag如果指定了该选项,结果中仅显示最新版本。--filter latest

    swcli runtime recover

    swcli [全局选项] runtime recover [选项] <RUNTIME>

    runtime recover 命令可以恢复以前删除的 Starwhale 运行时。

    RUNTIME是一个运行时URI。如果URI不包含版本,则会恢复所有删除的版本。

    已经被垃圾回收或者使用 --force 选项删除的 Starwhale 运行时或版本无法使用本命令恢复。

    选项必填项类型默认值说明
    --force-fNBooleanFalse如果使用了该选项,当前同名的Starwhale运行时或版本会被强制覆盖。

    swcli runtime remove

    swcli [全局选项] runtime remove [选项] <RUNTIME>

    runtime remove 命令可以删除指定的 Starwhale 运行时或某个版本。

    RUNTIME 是一个运行时URI。如果 URI 不包含版本,则删除所有版本。

    被删除的 Starwhale 运行时或版本可以在垃圾回收之前通过 swcli runtime recover 命令恢复。要永久删除某个 Starwhale 运行时或版本,您可以使用 --force 选项。

    被删除的 Starwhale 运行时或版本可以通过 swcli runtime list --show-removed 列出。

    选项必填项类型默认值说明
    --force-fNBooleanFalse使用此选项永久删除某个 Starwhale 运行时或版本。删除后不可恢复。

    swcli runtime tag

    swcli [全局选项] runtime tag [选项] <RUNTIME> [TAGS]...

    runtime tag 命令将标签附加到指定的 Starwhale 运行时版本,同时支持删除和列出所有标签的功能。可以在运行时URI中使用标签替代版本 ID。

    RUNTIME 是一个运行时URI

    每个运行时版本可以包含任意数量的标签,但同一运行时中不允许有重复的标签名称。

    runtime tag仅适用于 Standalone 实例.

    选项必填项类型默认值说明
    --remove-rNBooleanFalse使用该选项删除标签
    --quiet-qNBooleanFalse使用该选项以忽略错误,例如删除不存在的标签。
    --force-add-fNBooleanFalse当向 server/cloud 实例中添加标签时,若遇到其他版本的运行时已经使用该标签会提示报错,强制更新时可以使用 --force-add 参数。

    Starwhale 运行时标签的例子

    #- list tags of the pytorch runtime
    swcli runtime tag pytorch

    #- add tags for the pytorch runtime
    swcli runtime tag mnist t1 t2
    swcli runtime tag cloud://cloud.starwhale.cn/project/public:starwhale/runtime/pytorch/version/latest t1 --force-add
    swcli runtime tag mnist t1 --quiet

    #- remove tags for the pytorch runtime
    swcli runtime tag mnist -r t1 t2
    swcli runtime tag cloud://cloud.starwhale.cn/project/public:starwhale/runtime/pytorch --remove t1
    - + \ No newline at end of file diff --git a/zh/reference/swcli/utilities/index.html b/zh/reference/swcli/utilities/index.html index 3f4762e99..cc9dcf338 100644 --- a/zh/reference/swcli/utilities/index.html +++ b/zh/reference/swcli/utilities/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    其他命令

    swcli gc

    swcli [全局选项] gc [选项]

    gc根据内部的垃圾回收策略清理已经被删除的项目、模型、数据集和运行时。

    选项必填项类型默认值说明
    --dry-runNBooleanFalse如果为真,仅输出将被删除的对象而不清理。
    --yesNBooleanFalse跳过所有需要确认的项目。

    swcli check

    swcli [全局选项] check

    检查 swcli 命令的外部依赖是否满足条件,目前主要检查 Docker 和 Conda。

    swcli completion install

    swcli [全局选项] completion install <SHELL_NAME>

    安装 swcli 命令补全,目前支持 bash, zsh 和 fish。如果不指定 SHELL_NAME,则尝试主动探测当前shell类型。

    swcli config edit

    swcli [全局选项] config edit

    编辑 Starwhale 配置文件,即 ~/.config/starwhale/config.yaml

    swcli ui

    swcli [全局选项] ui <INSTANCE>

    打开对应实例的Web页面。

    - + \ No newline at end of file diff --git a/zh/runtime/index.html b/zh/runtime/index.html index ad2953515..42aabafa0 100644 --- a/zh/runtime/index.html +++ b/zh/runtime/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    Starwhale 运行时

    overview

    概览

    Starwhale 运行时能够针对运行Python程序,提供一种可复现、可分享的运行环境。使用 Starwhale 运行时,可以非常容易的与他人分享,并且能在 Starwhale Server 和 Starwhale Cloud 实例上使用 Starwhale 运行时。

    Starwhale 运行时使用 venv, conda 和 docker 等基础技术,如果您当前正在使用这些技术,可以非常容易的将这个环境转化为 Starwhale 运行时。

    对于本地环境,Starwhale 运行时支持非常容易的多种环境管理和切换。Starwhale 运行时包含基础镜像和环境依赖两个部分。

    基础镜像

    Starwhale 基础镜像中会安装 Python, CUDA, cuDNN 和其他一些机器学习开发中必要的基础库。Starwhale 运行时提供多种基础镜像供选择,列表如下:

    • 体系结构:
      • X86 (amd64)
      • Arm (aarch64)
    • 操作系统:
      • Ubuntu 20.04 LTS (ubuntu:20.04)
    • Python:
      • 3.7
      • 3.8
      • 3.9
      • 3.10
      • 3.11
    • CUDA:
      • CUDA 11.3 + cuDNN 8.4
      • CUDA 11.4 + cuDNN 8.4
      • CUDA 11.5 + cuDNN 8.4
      • CUDA 11.6 + cuDNN 8.4
      • CUDA 11.7

    runtime.yaml 通过相关设置来决定使用何种基础镜像。

    - + \ No newline at end of file diff --git a/zh/runtime/yaml/index.html b/zh/runtime/yaml/index.html index 3889302a5..d11f491b8 100644 --- a/zh/runtime/yaml/index.html +++ b/zh/runtime/yaml/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    runtime.yaml 使用指南

    runtime.yaml 是构建 Starwhale 运行时的描述文件,用户可以细粒度的定义 Starwhale 运行时的各种属性。当使用 swcli runtime build 命令中 yaml 模式时,需要提供 runtime.yaml 文件。

    使用示例

    最简示例

    dependencies:
    - pip:
    - numpy
    name: simple-test

    定义一个以 venv 作为Python 包隔离方式,安装numpy依赖的 Starwhale 运行时。

    llama2 示例

    name: llama2
    mode: venv
    environment:
    arch: noarch
    os: ubuntu:20.04
    cuda: 11.7
    python: "3.10"
    dependencies:
    - pip:
    - torch
    - fairscale
    - fire
    - sentencepiece
    - gradio >= 3.37.0
    # external starwhale dependencies
    - starwhale[serve] >= 0.5.5

    完整字段示例

    # [required]The name of Starwhale Runtime
    name: demo
    # [optional]The mode of Starwhale Runtime: venv or conda. Default is venv.
    mode: venv
    # [optional]The configurations of pip and conda.
    configs:
    # If you do not use conda, ignore this field.
    conda:
    condarc: # custom condarc config file
    channels:
    - defaults
    show_channel_urls: true
    default_channels:
    - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main
    - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/r
    - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/msys2
    custom_channels:
    conda-forge: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
    pytorch: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
    pytorch-lts: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
    nvidia: https://mirrors.aliyun.com/anaconda/cloud
    ssl_verify: false
    default_threads: 10
    pip:
    # pip config set global.index-url
    index_url: https://example.org/
    # pip config set global.extra-index-url
    extra_index_url: https://another.net/
    # pip config set install.trusted-host
    trusted_host:
    - example.org
    - another.net
    # [optional] The definition of the environment.
    environment:
    # Now it must be ubuntu:20.04
    os: ubuntu:20.04
    # CUDA version. possible values: 11.3, 11.4, 11.5, 11.6, 11.7
    cuda: 11.4
    # Python version. possible values: 3.7, 3.8, 3.9, 3.10, 3.11
    python: 3.8
    # Define your custom base image
    docker:
    image: mycustom.com/docker/image:tag
    # [required] The dependencies of the Starwhale Runtime.
    dependencies:
    # If this item is present, conda env create -f conda.yml will be executed
    - conda.yaml
    # If this item is present, pip install -r requirements.txt will be executed before installing other pip packages
    - requirements.txt
    # Packages to be install with conda. venv mode will ignore the conda field.
    - conda:
    - numpy
    - requests
    # Packages to be installed with pip. The format is the same as requirements.txt
    - pip:
    - pillow
    - numpy
    - deepspeed==0.9.0
    - safetensors==0.3.0
    - transformers @ git+https://github.com/huggingface/transformers.git@3c3108972af74246bc3a0ecf3259fd2eafbacdef
    - peft @ git+https://github.com/huggingface/peft.git@fcff23f005fc7bfb816ad1f55360442c170cd5f5
    - accelerate @ git+https://github.com/huggingface/accelerate.git@eba6eb79dc2ab652cd8b44b37165a4852768a8ac
    # Additional wheels packages to be installed when restoring the runtime
    - wheels:
    - dummy-0.0.0-py3-none-any.whl
    # Additional files to be included in the runtime
    - files:
    - dest: bin/prepare.sh
    name: prepare
    src: scripts/prepare.sh
    # Run some custom commands
    - commands:
    - apt-get install -y libgl1
    - touch /tmp/runtime-command-run.flag
    - + \ No newline at end of file diff --git a/zh/server/guides/server_admin/index.html b/zh/server/guides/server_admin/index.html index ae0af71f9..7524cea7a 100644 --- a/zh/server/guides/server_admin/index.html +++ b/zh/server/guides/server_admin/index.html @@ -10,14 +10,14 @@ - +
    版本:0.6.4

    Starwhale Server 系统设置

    超级管理员密码重置

    一旦您忘记了超级管理员的密码, 您可以通过下面的SQL语句将密码重置为 abcd1234

    update user_info set user_pwd='ee9533077d01d2d65a4efdb41129a91e', user_pwd_salt='6ea18d595773ccc2beacce26' where id=1

    重置后,您可以使用上述密码登录到console。 然后再次修改密码为您想要的密码。

    系统设置

    您可以在 Starwhale Server Web 界面中对系统设置进行更改,目前支持runtime的docker镜像源修改以及资源池的划分等。下面是系统设置的一个例子:

    dockerSetting:
    registryForPull: "docker-registry.starwhale.cn/star-whale"
    registryForPush: ""
    userName: ""
    password: ""
    insecure: true
    pypiSetting:
    indexUrl: ""
    extraIndexUrl: ""
    trustedHost: ""
    retries: 10
    timeout: 90
    imageBuild:
    resourcePool: ""
    image: ""
    clientVersion: ""
    pythonVersion: ""
    datasetBuild:
    resourcePool: ""
    image: ""
    clientVersion: ""
    pythonVersion: ""
    resourcePoolSetting:
    - name: "default"
    nodeSelector: null
    resources:
    - name: "cpu"
    max: null
    min: null
    defaults: 5.0
    - name: "memory"
    max: null
    min: null
    defaults: 3145728.0
    - name: "nvidia.com/gpu"
    max: null
    min: null
    defaults: null
    tolerations: null
    metadata: null
    isPrivate: null
    visibleUserIds: null
    storageSetting:
    - type: "minio"
    tokens:
    bucket: "users"
    ak: "starwhale"
    sk: "starwhale"
    endpoint: "http://10.131.0.1:9000"
    region: "local"
    hugeFileThreshold: "10485760"
    hugeFilePartSize: "5242880"
    - type: "s3"
    tokens:
    bucket: "users"
    ak: "starwhale"
    sk: "starwhale"
    endpoint: "http://10.131.0.1:9000"
    region: "local"
    hugeFileThreshold: "10485760"
    hugeFilePartSize: "5242880"

    镜像源设置

    Server 下发的 Tasks 都是基于 docker 实现的,Starwhale Server 支持自定义镜像源,包括 dockerSetting.registryForPushdockerSetting.registryForPull

    资源池设置

    资源池实现了集群机器分组的功能。用户在创建任务时可以通过选择资源池将自己的任务下发到想要的机器组中。资源池可以理解为 Kubernetes 中的 nodeSelector,所以当您在K8S集群中给机器打上标签后,就可以在这里配置您的 resourcePool

    存储设置

    您可以通过存储设置来配置 Starwhale Server可以访问那些存储介质:

    storageSetting:
    - type: s3
    tokens:
    - bucket: starwhale # required
    ak: access_key # required
    sk: scret_key # required
    endpoint: http://s3.region.amazonaws.com # optional
    region: region of the service # required when endpoint is empty
    hugeFileThreshold: 10485760 # bigger than 10MB will use multiple part upload
    hugeFilePartSize: 5242880 # MB part size for multiple part upload
    - type: minio
    tokens:
    - bucket: starwhale # required
    ak: access_key # required
    sk: scret_key # required
    endpoint: http://10.131.0.1:9000 # required
    region: local # optional
    hugeFileThreshold: 10485760 # bigger than 10MB will use multiple part upload
    hugeFilePartSize: 5242880 # MB part size for multiple part upload
    - type: aliyun
    tokens:
    - bucket: starwhale # required
    ak: access_key # required
    sk: scret_key # required
    endpoint: http://10.131.0.2:9000 # required
    region: local # optional
    hugeFileThreshold: 10485760 # bigger than 10MB will use multiple part upload
    hugeFilePartSize: 5242880 # MB part size for multiple part upload

    每一个 storageSetting 条目都应该有一个StorageAccessService接口的实现. Starwhale目前有四个内置的实现:

    • StorageAccessServiceAliyun 可以处理 typealiyun 或者 oss 的条目
    • StorageAccessServiceMinio 可以处理typeminio 的条目
    • StorageAccessServiceS3 可以处理 types3 的条目
    • StorageAccessServiceFile 可以处理 typefs 或者 file 的条目

    不同的实现对 tokens 的要求是不一样的. 当 typealiyunminio或者oss的时候 endpoint 是 必填的。 当 endpoint 为空并且 types3 的时候 region 必填的。 而 fs/file 类型的存储则需要 rootDirserviceProvider 作为tokens的key. 更多细节请参阅代码。

    - + \ No newline at end of file diff --git a/zh/server/index.html b/zh/server/index.html index f1927eed9..215c2933f 100644 --- a/zh/server/index.html +++ b/zh/server/index.html @@ -10,13 +10,13 @@ - + - + \ No newline at end of file diff --git a/zh/server/installation/docker-compose/index.html b/zh/server/installation/docker-compose/index.html index 7a898e73f..c92a214f7 100644 --- a/zh/server/installation/docker-compose/index.html +++ b/zh/server/installation/docker-compose/index.html @@ -10,14 +10,14 @@ - +
    版本:0.6.4

    使用Docker Compose安装Starwhale

    先决条件

    安装方法

    启动服务

    wget https://raw.githubusercontent.com/star-whale/starwhale/main/docker/compose/compose.yaml
    GLOBAL_IP=${your_accessible_ip_for_server} ; docker compose up

    GLOBAL_IP 需要是可以被所有 swcli 访问到的,包括用户实际使用的swcli和container内部的swcli. 如果不能访问,请确认您的防火墙设置.

    compose.yaml 包含了Mysql数据库,MinIO存储和Controller服务. 创建一个 compose.override.yaml, 可以覆盖 compose.yaml 中的配置. 如何配置可以参考此处

    - + \ No newline at end of file diff --git a/zh/server/installation/docker/index.html b/zh/server/installation/docker/index.html index 280500f66..64c730349 100644 --- a/zh/server/installation/docker/index.html +++ b/zh/server/installation/docker/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    使用 Docker 安装 Starwhale Server

    先决条件

    • 1.19或者更高版本的Kubernetes集群用于执行任务。
    • MySQL 8.0以上版本的数据库实例用于存储元数据。
    • 兼容S3接口的对象存储,用于保存数据集、模型等。

    请确保您的Kubernetes集群上的pod可以访问Starwhale Server侦听的端口。

    为Docker准备env文件

    Starwhale Server可以通过环境变量进行配置。

    Docker的env文件模板参考此处。您可以通过修改模板来创建自己的env文件。

    准备kubeconfig文件[可选][SW_SCHEDULER=k8s]

    kubeconfig文件用于访问Kubernetes集群。 有关kubeconfig文件的更多信息,请参阅官方Kubernetes文档

    如果您安装了kubectl命令行工具,可以运行 kubectl config view 来查看您当前的配置。

    启动Docker镜像

    docker run -it -d --name starwhale-server -p 8082:8082 \
    --restart unless-stopped \
    --mount type=bind,source=<您的kubeconfig文件路径>,destination=/root/.kube/config,readonly \
    --env-file <您的env文件路径> \
    docker-registry.starwhale.cn/star-whale/server:0.5.6

    对于非中国大陆网络用户,可以使用托管在 ghcr.io 上的镜像: ghcr.io/star-whale/server

    - + \ No newline at end of file diff --git a/zh/server/installation/helm-charts/index.html b/zh/server/installation/helm-charts/index.html index b0cd70871..4183418a0 100644 --- a/zh/server/installation/helm-charts/index.html +++ b/zh/server/installation/helm-charts/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    使用 Helm 安装 Starwhale Server

    先决条件

    • 1.19或者更高版本的Kubernetes集群用于执行任务。
    • MySQL 8.0以上版本的数据库实例用于存储元数据。
    • 兼容S3接口的对象存储,用于保存数据集、模型等。
    • Helm 3.2.0+。

    Starwhale Helm charts 包括 MySQL 和 MinIO 作为依赖项。如果您没有自己的 MySQL 实例或任何与 AWS S3 兼容的对象存储可用,可以通过 Helm Chats 进行安装。请查看下文的安装选项以了解如何在安装 Starwhale Server 的同时安装 MySQL 和 MinIO。

    在 Kubernetes 上为 Starwhale Server 创建一个服务账号

    如果您的 Kubernetes 集群启用了 RBAC(在 Kubernetes 1.6+中,默认启用 RBAC),Starwhale Server 将无法正常工作,除非由至少具有以下权限的服务帐户启动:

    ResourceAPI GroupGetListWatchCreateDelete
    jobsbatchYYYYY
    podscoreYYY
    nodescoreYYY
    events""Y

    例子:

    apiVersion: rbac.authorization.k8s.io/v1
    kind: ClusterRole
    metadata:
    name: starwhale-role
    rules:
    - apiGroups:
    - ""
    resources:
    - pods
    - nodes
    verbs:
    - get
    - list
    - watch
    - apiGroups:
    - "batch"
    resources:
    - jobs
    verbs:
    - create
    - get
    - list
    - watch
    - delete
    - apiGroups:
    - ""
    resources:
    - events
    verbs:
    - get
    - watch
    - list
    ---
    apiVersion: rbac.authorization.k8s.io/v1
    kind: ClusterRoleBinding
    metadata:
    name: starwhale-binding
    roleRef:
    apiGroup: rbac.authorization.k8s.io
    kind: ClusterRole
    name: starwhale-role
    subjects:
    - kind: ServiceAccount
    name: starwhale

    下载 Starwhale Helm chart

    helm repo add starwhale https://star-whale.github.io/charts
    helm repo update

    安装Starwhale Server

    helm install starwhale-server starwhale/starwhale-server -n starwhale --create-namespace

    如果您安装了kubectl命令行工具,您可以运行 kubectl get pods -n starwhale 来检查是否所有 pod 都在正常运行中。

    更新 Starwhale Server

    helm repo update
    helm upgrade starwhale-server starwhale/starwhale-server

    卸载 Starwhale Server

    helm delete starwhale-server
    - + \ No newline at end of file diff --git a/zh/server/installation/index.html b/zh/server/installation/index.html index ca54ae46b..b0ab6c1cd 100644 --- a/zh/server/installation/index.html +++ b/zh/server/installation/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    Starwhale Server 安装指南

    Starwhale Server 以 Docker 镜像的形式发布。您可以直接使用 Docker 运行,也可以部署到 Kubernetes 集群上。

    - + \ No newline at end of file diff --git a/zh/server/installation/minikube/index.html b/zh/server/installation/minikube/index.html index 4f1530e93..570ca1935 100644 --- a/zh/server/installation/minikube/index.html +++ b/zh/server/installation/minikube/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    使用 Minikube 安装 Starwhale Server

    先决条件

    启动 Minikube

    minikube start --kubernetes-version=1.25.3 --image-repository=docker-registry.starwhale.cn/minikube --base-image=docker-registry.starwhale.cn/minikube/k8s-minikube/kicbase:v0.0.42

    minikube addons enable ingress --images="KubeWebhookCertgenPatch=ingress-nginx/kube-webhook-certgen:v20231011-8b53cabe0,KubeWebhookCertgenCreate=ingress-nginx/kube-webhook-certgen:v20231011-8b53cabe0,IngressController=ingress-nginx/controller:v1.9.4"

    目前 docker-registry.starwhale.cn/minikube 目前只缓存了 Kubernetes 1.25.3 的镜像,也可以使用阿里云提供的 Minikube 镜像:

    minikube start --image-mirror-country=cn

    minikube addons enable ingress --images="KubeWebhookCertgenPatch=kube-webhook-certgen:v20231011-8b53cabe0,KubeWebhookCertgenCreate=kube-webhook-certgen:v20231011-8b53cabe0,IngressController=nginx-ingress-controller:v1.9.4" --registries="KubeWebhookCertgenPatch=registry.cn-hangzhou.aliyuncs.com/google_containers,KubeWebhookCertgenCreate=registry.cn-hangzhou.aliyuncs.com/google_containers,IngressController=registry.cn-hangzhou.aliyuncs.com/google_containers"

    对于非中国大陆网络用户,可以使用如下命令:

    minikube start --addons ingress

    如果在您的机器上没有安装 kubectl,可以使用 Minikube 自带的 kubectl: minikube kubectl 或 bashrc中增加 alias kubectl="minikube kubectl --"

    安装 Starwhale Server

    helm repo add starwhale https://star-whale.github.io/charts
    helm repo update
    helm pull starwhale/starwhale --untar --untardir ./charts

    helm upgrade --install starwhale ./charts/starwhale -n starwhale --create-namespace -f ./charts/starwhale/values.minikube.cn.yaml

    对于非中国大陆网络用户,可以使用 values.minikube.global.yaml,命令如下:

    helm upgrade --install starwhale ./charts/starwhale -n starwhale --create-namespace -f ./charts/starwhale/values.minikube.global.yaml

    当成功安装后,会有类似如下的提示信息输出:

        Release "starwhale" has been upgraded. Happy Helming!
    NAME: starwhale
    LAST DEPLOYED: Tue Feb 14 16:25:03 2023
    NAMESPACE: starwhale
    STATUS: deployed
    REVISION: 14
    NOTES:
    ******************************************
    Chart Name: starwhale
    Chart Version: 0.5.6
    App Version: latest
    Starwhale Image:
    - server: ghcr.io/star-whale/server:latest

    ******************************************
    Controller:
    - visit: http://controller.starwhale.svc
    Minio:
    - web visit: http://minio.starwhale.svc
    - admin visit: http://minio-admin.starwhale.svc
    MySQL:
    - port-forward:
    - run: kubectl port-forward --namespace starwhale svc/mysql 3306:3306
    - visit: mysql -h 127.0.0.1 -P 3306 -ustarwhale -pstarwhale
    Please run the following command for the domains searching:
    echo "$(sudo minikube ip) controller.starwhale.svc minio.starwhale.svc minio-admin.starwhale.svc " | sudo tee -a /etc/hosts
    ******************************************
    Login Info:
    - starwhale: u:starwhale, p:abcd1234
    - minio admin: u:minioadmin, p:minioadmin

    *_* Enjoy to use Starwhale Platform. *_*

    检查 Starwhale Server 状态

    Minikube 方式启动 Starwhale Server 一般要用时3-5分钟,可以输出如下命令检查是否完成启动:

    kubectl get deployments -n starwhale
    NAMEREADYUP-TO-DATEAVAILABLEAGE
    controller1/1115m
    minio1/1115m
    mysql1/1115m

    本机访问的网络配置

    输出如下命令后,就可以在浏览器中通过 http://controller.starwhale.svc 访问 Starwhale Server:

    echo "$(sudo minikube ip) controller.starwhale.svc minio.starwhale.svc  minio-admin.starwhale.svc " | sudo tee -a /etc/hosts

    其他机器访问的网络配置

    • 步骤1: 在 Starwhale Server 所在机器上

      使用 socat 命令做临时的端口转发,命令如下:

      # install socat at first, ref: https://howtoinstall.co/en/socat
      sudo socat TCP4-LISTEN:80,fork,reuseaddr,bind=0.0.0.0 TCP4:`minikube ip`:80

      当您停掉socat进程后,端口转发会被禁止,其他机器的访问也会被禁止。如果想长期开启端口转发,可以使用 iptables 命令。

    • 步骤2: 在其他机器上

      在 hosts 文件添加相关域名映射,命令如下:

      # for macOSX or Linux environment, run the command in the shell.
      echo ${your_machine_ip} controller.starwhale.svc minio.starwhale.svc minio-admin.starwhale.svc " | sudo tee -a /etc/hosts

      # for Windows environment, run the command in the PowerShell with administrator permission.
      Add-Content -Path C:\Windows\System32\drivers\etc\hosts -Value "`n${your_machine_ip} controller.starwhale.svc minio.starwhale.svc minio-admin.starwhale.svc"
    - + \ No newline at end of file diff --git a/zh/server/installation/starwhale_env/index.html b/zh/server/installation/starwhale_env/index.html index 1acf14417..9a70dfbc7 100644 --- a/zh/server/installation/starwhale_env/index.html +++ b/zh/server/installation/starwhale_env/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    Starwhale 环境变量文件示例

    ################################################################################
    # *** Required ***
    # The external Starwhale server URL. For example: https://cloud.starwhale.ai
    SW_INSTANCE_URI=

    # The listening port of Starwhale Server
    SW_CONTROLLER_PORT=8082

    # The maximum upload file size. This setting affects datasets and models uploading when copied from outside.
    SW_UPLOAD_MAX_FILE_SIZE=20480MB
    ################################################################################
    # The base URL of the Python Package Index to use when creating a runtime environment.
    SW_PYPI_INDEX_URL=http://10.131.0.1/repository/pypi-hosted/simple/

    # Extra URLs of package indexes to use in addition to the base url.
    SW_PYPI_EXTRA_INDEX_URL=

    # Space separated hostnames. When any host specified in the base URL or extra URLs does not have a valid SSL
    # certification, use this option to trust it anyway.
    SW_PYPI_TRUSTED_HOST=
    ################################################################################
    # The JWT token expiration time. When the token expires, the server will request the user to login again.
    SW_JWT_TOKEN_EXPIRE_MINUTES=43200

    # *** Required ***
    # The JWT secret key. All strings are valid, but we strongly recommend you to use a random string with at least 16 characters.
    SW_JWT_SECRET=
    ################################################################################
    # The scheduler controller to use. Valid values are:
    # docker: Controller schedule jobs by leveraging docker
    # k8s: Controller schedule jobs by leveraging Kubernetes
    SW_SCHEDULER=k8s

    # The Kubernetes namespace to use when running a task when SW_SCHEDULER is k8s
    SW_K8S_NAME_SPACE=default

    # The path on the Kubernetes host node's filesystem to cache Python packages. Use the setting only if you have
    # the permission to use host node's filesystem. The runtime environment setup process may be accelerated when the host
    # path cache is used. Leave it blank if you do not want to use it.
    SW_K8S_HOST_PATH_FOR_CACHE=

    # The ip for the containers created by Controller when SW_SCHEDULER is docker
    SW_DOCKER_CONTAINER_NODE_IP=127.0.0.1
    ###############################################################################
    # *** Required ***
    # The object storage system type. Valid values are:
    # s3: [AWS S3](https://aws.amazon.com/s3) or other s3-compatible object storage systems
    # aliyun: [Aliyun OSS](https://www.alibabacloud.com/product/object-storage-service)
    # minio: [MinIO](https://min.io)
    # file: Local filesystem
    SW_STORAGE_TYPE=

    # The path prefix for all data saved on the storage system.
    SW_STORAGE_PREFIX=
    ################################################################################
    # The following settings are only used when SW_STORAGE_TYPE is file.

    # The root directory to save data.
    # This setting is only used when SW_STORAGE_TYPE is file.
    SW_STORAGE_FS_ROOT_DIR=/usr/local/starwhale
    ################################################################################
    # The following settings are only used when SW_STORAGE_TYPE is not file.

    # *** Required ***
    # The name of the bucket to save data.
    SW_STORAGE_BUCKET=

    # *** Required ***
    # The endpoint URL of the object storage service.
    # This setting is only used when SW_STORAGE_TYPE is s3 or aliyun.
    SW_STORAGE_ENDPOINT=

    # *** Required ***
    # The access key used to access the object storage system.
    SW_STORAGE_ACCESSKEY=

    # *** Required ***
    # The secret access key used to access the object storage system.
    SW_STORAGE_SECRETKEY=

    # *** Optional ***
    # The region of the object storage system.
    SW_STORAGE_REGION=

    # Starwhale Server will use multipart upload when uploading a large file. This setting specifies the part size.
    SW_STORAGE_PART_SIZE=5MB
    ################################################################################
    # MySQL settings

    # *** Required ***
    # The hostname/IP of the MySQL server.
    SW_METADATA_STORAGE_IP=

    # The port of the MySQL server.
    SW_METADATA_STORAGE_PORT=3306

    # *** Required ***
    # The database used by Starwhale Server
    SW_METADATA_STORAGE_DB=starwhale

    # *** Required ***
    # The username of the MySQL server.
    SW_METADATA_STORAGE_USER=

    # *** Required ***
    # The password of the MySQL server.
    SW_METADATA_STORAGE_PASSWORD=
    ################################################################################

    # 用于缓存WAL文件的目录。请将其指向一个有足够空间的挂载卷或主机路径。
    # 如果不设置,WAL文件将保存在docker运行时层,当容器重启时cache数据将丢失。
    SW_DATASTORE_WAL_LOCAL_CACHE_DIR=
    - + \ No newline at end of file diff --git a/zh/server/project/index.html b/zh/server/project/index.html index 2d27fec85..64ea663b0 100644 --- a/zh/server/project/index.html +++ b/zh/server/project/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    How to Organize and Manage Resources with Starwhale Projects

    Project is the basic unit for organizing and managing resources (such as models, datasets, runtime environments, etc.). You can create and manage projects based on your needs. For example, you can create projects by business team, product line, or models. One user can create and participate in one or more projects.

    Project type

    There are two types of projects:

    • Private project: The project (and related resources in the project) is only visible to project members with permission. Project members can view or edit the project (as well as associated resources in the project). For more information on roles, please take a look at Roles and permissions in Starwhale.

    • Public project: The project (and related resources in the project) is visible to all Starwhale users. Project members can view or edit the project (as well as associated resources in the project). For more information on roles, please take a look at Roles and permissions in Starwhale.

    Create a project

    1. Click the Create button in the upper right corner of the project list page;
    2. Enter a name for the project. Pay attention to avoiding duplicate names. For more information, please see Names in Starwhale
    3. Select the Project Type, which is defaulted to private project and can be selected as public according to needs;
    4. Fill in the description content;
    5. To finish, Click the Submit button.

    Edit a project

    The name, privacy and description of a project can be edited.

    1. Go to the project list page and find the project that needs to be edited by searching for the project name, then click the Edit Project button;
    2. Edit the items that need to be edited;
    3. Click Submit to save the edited content;
    4. If you're editing multiple projects, repeat steps 1 through 3.

    View a project

    My projects

    On the project list page, only my projects are displayed by default. My projects refer to the projects participated in by the current users as project members or project owners.

    Project sorting

    On the project list page, all projects are supported to be sorted by "Recently visited", "Project creation time from new to old", and "Project creation time from old to new", which can be selected according to your needs.

    Delete a project

    Once a project is deleted, all related resources (such as datasets, models, runtimes, evaluations, etc.) will be deleted and cannot be restored.

    1. Enter the project list page and search for the project name to find the project that needs to be deleted. Hover your mouse over the project you want to delete, then click the Delete button;
    2. Follow the prompts, enter the relevant information, click Confirm to delete the project, or click Cancel to cancel the deletion;
    3. If you are deleting multiple projects, repeat the above steps.

    Manage project member

    Only users with the admin role can assign people to the project. The project owner defaulted to having the project owner role.

    Add a member

    1. Click Manage Members to go to the project member list page;
    2. Click the Add Member button in the upper right corner.
    3. Enter the Username you want to add, select a project role for the user in the project.
    4. Click submit to complete.
    5. If you're adding multiple members, repeat steps 1 through 4.

    Remove a member

    1. On the project list page or project overview tab, click Manage Members to go to the project member list page.
    2. Search for the username you want to delete, then click the Delete button.
    3. Click Yes to delete the user from this project, click No to cancel the deletion.
    4. If you're removing multiple members, repeat steps 1 through 3.

    Edit a member's role

    1. Hover your mouse over the project you want to edit, then click Manage Members to go to the project member list page.
    2. Find the username you want to adjust through searching, click the Project Role drop-down menu, and select a new project role. For more information on roles, please take a look at Roles and permissions in Starwhale.
    - + \ No newline at end of file diff --git a/zh/swcli/config/index.html b/zh/swcli/config/index.html index d8c9826cc..fa33f9679 100644 --- a/zh/swcli/config/index.html +++ b/zh/swcli/config/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    配置文件

    Standalone Instance 是安装在用户的笔记本或开发服务器上,以Linux/Mac用户为粒度进行隔离。用户通过 pip 命令安装 Starwhale Python package 并执行任意 swcli 命令后,就可以在 ~/.config/starwhale/config.yaml 中查看该用户的 Starwhale 配置。绝大多数情况加用户不需要手工修改config.yaml文件

    ~/.config/starwhale/config.yaml 文件权限为 0o600,由于里面存有密钥信息,不建议用户修改该文件权限。您可以通过swci config edit来修改配置:

    swcli config edit

    config.yaml 例子

    典型的 config.yaml 文件内容如下:

    • 当前默认默认 Instance 为 local。
    • cloud-cn/cloud-k8s/pre-k8s 三个为 Cloud Instance,local 为 Standalone Instance。
    • Standalone 本地存储的根目录为 /home/liutianwei/.starwhale
    current_instance: local
    instances:
    cloud-cn:
    sw_token: ${TOKEN}
    type: cloud
    updated_at: 2022-09-28 18:41:05 CST
    uri: https://cloud.starwhale.cn
    user_name: starwhale
    user_role: normal
    cloud-k8s:
    sw_token: ${TOKEN}
    type: cloud
    updated_at: 2022-09-19 16:10:01 CST
    uri: http://cloud.pre.intra.starwhale.ai
    user_name: starwhale
    user_role: normal
    local:
    current_project: self
    type: standalone
    updated_at: 2022-06-09 16:14:02 CST
    uri: local
    user_name: liutianwei
    pre-k8s:
    sw_token: ${TOKEN}
    type: cloud
    updated_at: 2022-09-19 18:06:50 CST
    uri: http://console.pre.intra.starwhale.ai
    user_name: starwhale
    user_role: normal
    link_auths:
    - ak: starwhale
    bucket: users
    connect_timeout: 10.0
    endpoint: http://10.131.0.1:9000
    read_timeout: 100.0
    sk: starwhale
    type: s3
    storage:
    root: /home/liutianwei/.starwhale
    version: '2.0'

    config.yaml 字段说明

    参数说明类型默认值是否必须
    current_instance默认使用的instance名字,一般用 swcli instance select 命令设置Stringself
    instances管理的 Instances,包括 Standalone, Server 和 Cloud Instance,至少会有 Standalone Instance(名称为local),Server/Cloud Instance有一个或多个,一般用 swcli instance login 登陆一个新的instance,swcli instance logout 退出一个instanceDictStandalone Instance,名称为local
    instances.{instance-alias-name}.sw_token登陆Token,只对Server/Cloud Instance生效,后续swcli对Server/Cloud Instance进行操作时都会使用该Token。需要注意Token有过期时间,默认1个月,可以在Server/Cloud Instance中进行设置StringCloud-是,Standalone-否
    instances.{instance-alias-name}.typeinstance类型,目前只能填写 cloudstandaloneChoice[String]
    instances.{instance-alias-name}.uri对于Server/Cloud Instance,uri是http/https地址,对于Standalone Instance,uri是 localString
    instances.{instance-alias-name}.user_name用户名String
    instances.{instance-alias-name}.current_project当前Instance下默认的Project是什么,在URI的表述中会作为project字段进行默认填充,可以通过 swcli project select 命令进行设置String
    instances.{instance-alias-name}.user_role用户角色Stringnormal
    instances.{instance-alias-name}.updated_at该条Instance配置更新时间时间格式字符串
    storage与本地存储相关的设置Dict
    storage.rootStandalone Instance本地存储的根目录。通常情况下,当home目录空间不足,手工把数据文件移动到其他位置时,可以修改该字段String~/.starwhale
    versionconfig.yaml的版本,目前仅支持2.0String2.0

    Standalone Instance 的文件存储结构

    ${storage.root} 目录中存储了 Standalone Instance 所有的用户数据,包括 Project、Runtime、Model、Dataset、Evaluation 等用户直接感知的数据,也包括 ObjectStore、DataStore 等 Starwhale 后台实现的存储。具体说明如下:

    +-- ${storage.root}
    | +-- .objectstore --> 存储数据集chunk文件的简单存储,使用blake2b hash算法
    | | +-- blake2b --> hash算法名称
    | | | +-- 00 --> hash2位前缀
    | | | | +-- 0019ad58... --> object文件,文件名是文件内容的hash值
    | | | +-- 05
    | +-- .datastore --> 基于pyarrow的列式存储
    | | +-- project
    | | | +-- self --> 按照project名称进行分类存储
    | | | | +-- dataset --> 数据集相关的datastore存储,一般用来存储数据集的索引信息
    | | | | +-- eval --> 模型评测结果存储
    | +-- .recover --> 软删除某个project的存储目录,可以用 `swcli project recover` 进行恢复
    | +-- .tmp --> Dataset/Model/Runtime 构建过程中临时目录
    | +-- myproject --> 用户创建的project,所有myproject信息都存储在该目录
    | +-- self --> Standalone Instance自动创建的project
    | | +-- dataset --> swds数据集存储目录
    | | +-- evaluation --> 模型评测配置文件、日志等存储目录
    | | +-- model --> swmp模型包存储目录
    | | +-- runtime --> swrt环境包存储目录
    | | +-- workdir --> 解压、复原包文件的目录
    | | | +-- model --> swmp解压后的目录
    | | | +-- runtime --> swrt解压后的目录,若进行runtime restore操作,生成的venv或conda隔离环境,也会存放在该目录中

    有时候您可能需要用到 starwhale.Link 来存储一些信息。理论上,Link里面的URI可以是任意的合法 URI(星鲸目前只支持S3协议族和HTTP),比如s3://10.131.0.1:9000/users/path。然而,有些 Link是需要鉴权才能访问的。 link_auths 就是用来存放这些鉴权信息的。

    link_auths:
    - type: s3
    ak: starwhale
    bucket: users
    region: local
    connect_timeout: 10.0
    endpoint: http://10.131.0.1:9000
    read_timeout: 100.0
    sk: starwhale

    link_auths 里面的每一条都会自动匹配您的URI。 目前 S3 类型的鉴权信息通过 bucketendpoint 来匹配 URI。

    - + \ No newline at end of file diff --git a/zh/swcli/index.html b/zh/swcli/index.html index 2229967f6..b71d6da40 100644 --- a/zh/swcli/index.html +++ b/zh/swcli/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    Starwhale Client (swcli) 用户指南

    swcli 是一个命令行工具,可让您与 Starwhale 实例进行交互。您可以使用 swcli 完成 Starwhale 中几乎所有的任务。swcli 是用纯 Python3 编写的(需要 Python 3.7 ~ 3.11),因此可以通过 pip 命令轻松安装。目前,swcli 仅支持 Linux 和 macOS,Windows版本即将推出。

    - + \ No newline at end of file diff --git a/zh/swcli/installation/index.html b/zh/swcli/installation/index.html index ea915cb89..a49e16a1d 100644 --- a/zh/swcli/installation/index.html +++ b/zh/swcli/installation/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    安装指南

    swcli 命令行工具能够对各种实例完成几乎所有的操作,由于是由纯 Python3 编写,可以使用 pip 命令完成安装,本文会提供一些安装建议,帮助您获得一个干净的、无依赖冲突的 swcli Python 环境。

    安装建议

    非常不建议将 Starwhale 安装在系统的全局 Python 环境中,可能会导致 Python 的依赖冲突问题。使用 venv 或 conda 创建一个隔离的 Python 环境,并在其中安装 Starwhale,是 Python 推荐的做法。

    先决条件

    • Python3.7 ~ 3.11
    • Linux 或 macOS
    • Conda(可选)

    在Ubuntu系统中,可以运行以下命令:

    sudo apt-get install python3 python3-venv python3-pip

    #如果您想安装多个python版本
    sudo add-apt-repository -y ppa:deadsnakes/ppa
    sudo apt-get update
    sudo apt-get install -y python3.7 python3.8 python3.9 python3-pip python3-venv python3.8-venv python3.7-venv python3.9-venv

    swcli 可以在 macOS 下工作,包括 arm(M1 Chip) 和 x86(Intel Chip) 两种体系结构。但 macOS 下自带的 Python3 可能会遇到一些 Python 自身的问题,推荐使用 homebrew 进行安装:

    brew install python3

    安装 swcli

    使用venv安装

    venv 环境即可以使用 Python3 自带的 venv,也可以使用 virtualenv 工具。

    python3 -m venv ~/.cache/venv/starwhale
    source ~/.cache/venv/starwhale/bin/activate
    python3 -m pip install starwhale

    swcli --version

    sudo ln -sf "$(which swcli)" /usr/local/bin/

    使用conda安装

    conda create --name starwhale --yes  python=3.9
    conda activate starwhale
    python3 -m pip install starwhale

    swcli --version

    sudo ln -sf "$(which swcli)" /usr/local/bin/

    👏 现在,您可以在全局环境中使用 swcli 了。

    swcli 的特定场景依赖安装

    # 针对Audio处理, 主要包含soundfile库等
    python -m pip install starwhale[audio]

    # 针对Image处理,主要包含pillow库等
    python -m pip install starwhale[pillow]

    # 针对swcli model server命令
    python -m pip install starwhale[server]

    # 针对内建的Online Serving
    python -m pip install starwhale[online-serve]

    # 安装全部依赖
    python -m pip install starwhale[all]

    更新 swcli

    #适用于venv环境
    python3 -m pip install --upgrade starwhale

    #适用于conda环境
    conda run -n starwhale python3 -m pip install --upgrade starwhale

    卸载swcli

    python3 -m pip remove starwhale

    rm -rf ~/.config/starwhale
    rm -rf ~/.starwhale
    - + \ No newline at end of file diff --git a/zh/swcli/swignore/index.html b/zh/swcli/swignore/index.html index b095c6850..36efa0c59 100644 --- a/zh/swcli/swignore/index.html +++ b/zh/swcli/swignore/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    关于 .swignore 文件

    .swignore 文件与 .gitignore, .dockerignore 等文件类似,都是用来定义忽略某些文件或文件夹。.swignore 文件主要应用在 Starwhale 的模型构建过程中。默认情况下,swcli model build 命令 或 starwhale.model.build() Python SDK会遍历指定目录下的所有文件,并自动排除一些已知的、不适合放入模型包中的文件或目录。

    文件格式

    • swignore文件中的每一行指定一个匹配文件和目录的模式。
    • 空行不匹配任何文件,因此它可以作为可读性的分隔符。
    • 星号*匹配除斜杠以外的任何内容。
    • #开头的行作为注释。
    • 支持wildcard的表达,类似 *.jpg, *.png

    默认下自动排除的文件或目录

    如果不想排除这些文件,可以构建模型 (swcli model build 命令) 的时候增加 --add-all 参数。

    • __pycache__/
    • *.py[cod]
    • *$py.class
    • venv安装目录
    • conda安装目录

    例子

    这是MNIST示例中使用的.swignore文件:

    venv/*
    .git/*
    .history*
    .vscode/*
    .venv/*
    data/*
    .idea/*
    *.py[cod]
    - + \ No newline at end of file diff --git a/zh/swcli/uri/index.html b/zh/swcli/uri/index.html index 620bd8c9d..f65829197 100644 --- a/zh/swcli/uri/index.html +++ b/zh/swcli/uri/index.html @@ -10,13 +10,13 @@ - +
    版本:0.6.4

    Starwhale 资源URI

    提示

    资源 URI 在 Starwhale Client 中被广泛使用。URI 可以引用本地实例中的资源或远程实例中的任何其他资源。 这样 Starwhale Client 就可以轻松操作任何资源。

    concepts-org.jpg

    实例URI

    实例 URI 可以是以下形式之一:

    • local: 指本地的 Standalone 实例.
    • [http(s)://]<hostname or ip>[:<port>]:指向一个 Starwhale Cloud 实例。
    • [cloud://]<cloud alias>:Server或Cloud的实例别名,可以在实例登录阶段配置。
    警告

    “local”不同于“localhost”,前者为 Standalone 实例,而后者是一个 URL ,指向本地运行的 Starwhale Server 实例。

    例子:

    # 登录Starwhale Cloud,别名为swcloud
    swcli instance login --username <your account name> --password <your password> https://cloud.starwhale.cn --alias swcloud

    # 将模型从本地实例复制到云实例
    swcli model copy mnist/version/latest swcloud/project/<your account name>:demo

    # 将运行时复制到Starwhale Server实例:http://localhost:8081
    swcli runtime copy pytorch/version/v1 http://localhost:8081/project/<your account name>:demo

    项目URI

    项目URI的格式为“[<实例URI>/project/]<project name>”。 如果未指定实例 URI,则使用当前实例。

    例子:

    swcli project select self   # 选择当前实例中的self项目
    swcli project info local/project/self # 查看本地实例中的self项目信息

    模型/数据集/运行时URI

    • 模型URI: [<项目URI>/model/]<model name>[/version/<version id|tag>].
    • 数据集URI: [<项目URI>/dataset/]<dataset name>[/version/<version id|tag>].
    • 运行时URI: [<项目URI>/runtime/]<runtime name>[/version/<version id|tag>].
    提示
    • swcli 支持更加人性化的短版本ID。您可以只键入版本ID的前几个字符,前提是它至少有四个字符长且唯一指向某个版本ID。但是,recover 命令必须使用完整的版本ID。
    • 如果未指定项目URI,将使用默认项目
    • 您始终可以使用版本标签而不是版本ID。

    例子:

    swcli model info mnist/version/hbtdenjxgm4ggnrtmftdgyjzm43tioi  # 检查模型信息,模型名称:mnist,版本:hbtdenjxgm4ggnrtmftdgyjzm43tioi
    swcli model remove mnist/version/hbtdenj # 使用短版本ID
    swcli model info mnist # 检查mnist模型信息
    swcli model run mnist --runtime pytorch-mnist --dataset mnist # 使用latest的默认tag

    作业URI

    • 格式: [<项目URI>/job/]<job id>.
    • 如果未指定项目URI,将使用默认项目。

    例子:

    swcli job info mezdayjzge3w   # 查看默认实例和默认项目中的mezdayjzge3w版本
    swcli job info local/project/self/job/mezday # 检查本地实例,self项目,作业id:mezday

    默认实例

    当项目URI中的实例部分被省略时,将使用默认实例进行替代。默认实例是由 swcli instance loginswcli instance use 指定的。

    默认项目

    当模型/数据集/运行时/评估URI的项目部分被省略时,将使用默认项目。默认项目是指通过 swcli project use 命令选择的项目。

    - + \ No newline at end of file