From 6733e515455b37fa57a930a7e6545e45491b83cb Mon Sep 17 00:00:00 2001 From: jillian-maroket Date: Fri, 2 Feb 2024 08:41:24 +0000 Subject: [PATCH] deploy: 849582cf1e4aa15c58c5444f422bcba04ef83cde --- 404.html | 4 ++-- assets/js/{2f9a356c.ede4dc9d.js => 243a84ef.0b2fd61c.js} | 2 +- assets/js/243a84ef.b6a367e8.js | 1 - assets/js/2f9a356c.b42bd147.js | 1 + assets/js/{7a1ef0d5.c51cb8b4.js => 7a1ef0d5.c0159de7.js} | 2 +- .../{runtime~main.89f7ad10.js => runtime~main.f5155a29.js} | 2 +- authentication/index.html | 4 ++-- blog/archive/index.html | 4 ++-- development/dev-mode/index.html | 4 ++-- harvester-network/index.html | 4 ++-- import-image/index.html | 4 ++-- index.html | 4 ++-- installation/harvester-configuration/index.html | 4 ++-- installation/iso-install/index.html | 4 ++-- installation/pxe-boot-install/index.html | 4 ++-- intro/index.html | 4 ++-- kb/archive/index.html | 4 ++-- kb/atom.xml | 2 +- .../index.html | 4 ++-- kb/calculation_of_resource_metrics_in_harvester/index.html | 6 +++--- kb/configure_priority_class_longhorn/index.html | 4 ++-- kb/evicting-replicas-from-a-disk-the-cli-way/index.html | 4 ++-- kb/index.html | 6 +++--- kb/install_netapp_trident_csi/index.html | 4 ++-- kb/multiple-nics-vm-connectivity/index.html | 4 ++-- kb/nic-naming-scheme/index.html | 4 ++-- kb/package_your_own_toolbox_image/index.html | 4 ++-- kb/page/2/index.html | 4 ++-- kb/rss.xml | 2 +- kb/scan-and-repair-vm-root-filesystem/index.html | 4 ++-- kb/tags/best-practices/index.html | 4 ++-- kb/tags/calculation/index.html | 6 +++--- kb/tags/ceph/index.html | 4 ++-- kb/tags/cloud-provider/index.html | 4 ++-- kb/tags/configuration/index.html | 4 ++-- kb/tags/container/index.html | 4 ++-- kb/tags/csi/index.html | 4 ++-- kb/tags/debug/index.html | 4 ++-- kb/tags/disk-performance/index.html | 4 ++-- kb/tags/disk/index.html | 4 ++-- kb/tags/filesystem/index.html | 4 ++-- kb/tags/harvester/index.html | 6 +++--- kb/tags/index.html | 4 ++-- kb/tags/ip-pool/index.html | 4 ++-- kb/tags/live-migration/index.html | 4 ++-- kb/tags/load-balancer/index.html | 4 ++-- kb/tags/longhorn/index.html | 4 ++-- kb/tags/network/index.html | 4 ++-- kb/tags/policy/index.html | 4 ++-- kb/tags/priority-class/index.html | 4 ++-- kb/tags/reserved-resource/index.html | 6 +++--- kb/tags/resource-metrics/index.html | 6 +++--- kb/tags/rook/index.html | 4 ++-- kb/tags/root/index.html | 4 ++-- kb/tags/scheduling/index.html | 4 ++-- kb/tags/storage/index.html | 4 ++-- kb/tags/strategy/index.html | 4 ++-- kb/tags/upgrade/index.html | 4 ++-- kb/tags/virtual-machine/index.html | 4 ++-- kb/tags/vm/index.html | 4 ++-- .../index.html | 4 ++-- kb/use_rook_ceph_external_storage/index.html | 4 ++-- kb/vm-scheduling/index.html | 4 ++-- kb/vm_live_migration_policy_and_configuration/index.html | 4 ++-- markdown-page/index.html | 4 ++-- rancher-intergration/node-driver/index.html | 4 ++-- rancher-intergration/rancher-integration/index.html | 4 ++-- upgrade/index.html | 4 ++-- vm-management/access-to-the-vm/index.html | 4 ++-- vm-management/backup-restore/index.html | 4 ++-- vm-management/create-vm/index.html | 4 ++-- vm-management/live-migration/index.html | 4 ++-- 72 files changed, 142 insertions(+), 142 deletions(-) rename assets/js/{2f9a356c.ede4dc9d.js => 243a84ef.0b2fd61c.js} (65%) delete mode 100644 assets/js/243a84ef.b6a367e8.js create mode 100644 assets/js/2f9a356c.b42bd147.js rename assets/js/{7a1ef0d5.c51cb8b4.js => 7a1ef0d5.c0159de7.js} (96%) rename assets/js/{runtime~main.89f7ad10.js => runtime~main.f5155a29.js} (97%) diff --git a/404.html b/404.html index 69cc34b4..542d5d62 100644 --- a/404.html +++ b/404.html @@ -9,13 +9,13 @@ Page Not Found | The open-source hyperconverged infrastructure solution for a cloud-native world - +
Skip to main content

Page Not Found

We could not find what you were looking for.

Please contact the owner of the site that linked you to the original URL and let them know their link is broken.

- + \ No newline at end of file diff --git a/assets/js/2f9a356c.ede4dc9d.js b/assets/js/243a84ef.0b2fd61c.js similarity index 65% rename from assets/js/2f9a356c.ede4dc9d.js rename to assets/js/243a84ef.0b2fd61c.js index af0d1f36..6f186fdc 100644 --- a/assets/js/2f9a356c.ede4dc9d.js +++ b/assets/js/243a84ef.0b2fd61c.js @@ -1 +1 @@ -"use strict";(self.webpackChunkharvesterhci_io=self.webpackChunkharvesterhci_io||[]).push([[9398],{3905:function(e,t,r){r.d(t,{Zo:function(){return u},kt:function(){return m}});var n=r(7294);function a(e,t,r){return t in e?Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[t]=r,e}function o(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,n)}return r}function s(e){for(var t=1;t=0||(a[r]=e[r]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(a[r]=e[r])}return a}var c=n.createContext({}),l=function(e){var t=n.useContext(c),r=t;return e&&(r="function"==typeof e?e(t):s(s({},t),e)),r},u=function(e){var t=l(e.components);return n.createElement(c.Provider,{value:t},e.children)},d={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},p=n.forwardRef((function(e,t){var r=e.components,a=e.mdxType,o=e.originalType,c=e.parentName,u=i(e,["components","mdxType","originalType","parentName"]),p=l(r),m=a,h=p["".concat(c,".").concat(m)]||p[m]||d[m]||o;return r?n.createElement(h,s(s({ref:t},u),{},{components:r})):n.createElement(h,s({ref:t},u))}));function m(e,t){var r=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=r.length,s=new Array(o);s[0]=p;var i={};for(var c in t)hasOwnProperty.call(t,c)&&(i[c]=t[c]);i.originalType=e,i.mdxType="string"==typeof e?e:a,s[1]=i;for(var l=2;l=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var c=n.createContext({}),l=function(e){var t=n.useContext(c),a=t;return e&&(a="function"==typeof e?e(t):s(s({},t),e)),a},u=function(e){var t=l(e.components);return n.createElement(c.Provider,{value:t},e.children)},d={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},p=n.forwardRef((function(e,t){var a=e.components,r=e.mdxType,o=e.originalType,c=e.parentName,u=i(e,["components","mdxType","originalType","parentName"]),p=l(a),m=r,h=p["".concat(c,".").concat(m)]||p[m]||d[m]||o;return a?n.createElement(h,s(s({ref:t},u),{},{components:a})):n.createElement(h,s({ref:t},u))}));function m(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=a.length,s=new Array(o);s[0]=p;var i={};for(var c in t)hasOwnProperty.call(t,c)&&(i[c]=t[c]);i.originalType=e,i.mdxType="string"==typeof e?e:r,s[1]=i;for(var l=2;l=0||(a[r]=e[r]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(a[r]=e[r])}return a}var c=n.createContext({}),l=function(e){var t=n.useContext(c),r=t;return e&&(r="function"==typeof e?e(t):s(s({},t),e)),r},u=function(e){var t=l(e.components);return n.createElement(c.Provider,{value:t},e.children)},d={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},p=n.forwardRef((function(e,t){var r=e.components,a=e.mdxType,o=e.originalType,c=e.parentName,u=i(e,["components","mdxType","originalType","parentName"]),p=l(r),m=a,h=p["".concat(c,".").concat(m)]||p[m]||d[m]||o;return r?n.createElement(h,s(s({ref:t},u),{},{components:r})):n.createElement(h,s({ref:t},u))}));function m(e,t){var r=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=r.length,s=new Array(o);s[0]=p;var i={};for(var c in t)hasOwnProperty.call(t,c)&&(i[c]=t[c]);i.originalType=e,i.mdxType="string"==typeof e?e:a,s[1]=i;for(var l=2;l=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var c=n.createContext({}),l=function(e){var t=n.useContext(c),a=t;return e&&(a="function"==typeof e?e(t):s(s({},t),e)),a},u=function(e){var t=l(e.components);return n.createElement(c.Provider,{value:t},e.children)},d={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},p=n.forwardRef((function(e,t){var a=e.components,r=e.mdxType,o=e.originalType,c=e.parentName,u=i(e,["components","mdxType","originalType","parentName"]),p=l(a),m=r,h=p["".concat(c,".").concat(m)]||p[m]||d[m]||o;return a?n.createElement(h,s(s({ref:t},u),{},{components:a})):n.createElement(h,s({ref:t},u))}));function m(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=a.length,s=new Array(o);s[0]=p;var i={};for(var c in t)hasOwnProperty.call(t,c)&&(i[c]=t[c]);i.originalType=e,i.mdxType="string"==typeof e?e:r,s[1]=i;for(var l=2;l **Migrate**.\\n1. Choose the node to which you want to migrate the virtual machine and select **Apply**.\\n\\nAfter successfully selecting **Apply**, a CRD `VirtualMachineInstanceMigration` object is created, and the related `controller/operator` will start the process.\\n\\n### Migration CRD Object\\n\\nYou can also create the CRD `VirtualMachineInstanceMigration` object manually via `kubectl` or other tools.\\n\\nThe example below starts a migration process for a virtual machine instance (VMI) `new-vm`.\\n\\n```\\napiVersion: kubevirt.io/v1\\nkind: VirtualMachineInstanceMigration\\nmetadata:\\n name: migration-job\\nspec:\\n vmiName: new-vm\\n```\\n\\nUnder the hood, the open source projects `Kubevirt, Libvirt, QEMU, ... ` perform most of the `VM Live Migration`. [References.](#references)\\n\\n### Migration Status Reporting\\n\\nWhen starting a virtual machine instance (VMI), it has also been calculated whether the machine is live migratable. The result is being stored in the VMI `VMI.status.conditions`. The calculation can be based on multiple parameters of the VMI, however, at the moment, the calculation is largely based on the Access Mode of the VMI volumes. Live migration is only permitted when the volume access mode is set to ReadWriteMany. Requests to migrate a non-LiveMigratable VMI will be rejected.\\n\\nThe reported Migration Method is also being calculated during VMI start. `BlockMigration` indicates that some of the VMI disks require copying from the source to the destination. `LiveMigration` means that only the instance memory will be copied.\\n\\n```\\nStatus:\\n Conditions:\\n Status: True\\n Type: LiveMigratable\\n Migration Method: BlockMigration\\n```\\n\\n### Migration Status\\n\\nThe migration progress status is reported in `VMI.status`. Most importantly, it indicates whether the migration has been completed or failed.\\n\\nBelow is an example of a successful migration.\\n\\n```\\nMigration State:\\n Completed: true\\n End Timestamp: 2019-03-29T03:37:52Z\\n Migration Config:\\n Completion Timeout Per GiB: 800\\n Progress Timeout: 150\\n Migration UID: c64d4898-51d3-11e9-b370-525500d15501\\n Source Node: node02\\n Start Timestamp: 2019-03-29T04:02:47Z\\n Target Direct Migration Node Ports:\\n 35001: 0\\n 41068: 49152\\n 38284: 49153\\n Target Node: node01\\n Target Node Address: 10.128.0.46\\n Target Node Domain Detected: true\\n Target Pod: virt-launcher-testvmimcbjgw6zrzcmp8wpddvztvzm7x2k6cjbdgktwv8tkq\\n```\\n\\n## VM Live Migration Strategies\\n\\nVM Live Migration is a process during which a running Virtual Machine Instance moves to another compute node while the guest workload continues to run and remain accessible.\\n\\n### Understanding Different VM Live Migration Strategies\\n\\nVM Live Migration is a complex process. During a migration, the source VM needs to transfer its whole state (mainly RAM) to the target VM. If there are enough resources available, such as network bandwidth and CPU power, migrations should converge nicely. If this is not the scenario, however, the migration might get stuck without an ability to progress.\\n\\nThe main factor that affects migrations from the guest perspective is its dirty rate, which is the rate by which the VM dirties memory. Guests with high dirty rate lead to a race during migration. On the one hand, memory would be transferred continuously to the target, and on the other, the same memory would get dirty by the guest. On such scenarios, one could consider to use more advanced migration strategies. Refer to [Understanding different migration strategies](https://kubevirt.io/user-guide/operations/live_migration/#understanding-different-migration-strategies) for more details.\\n\\nThere are 3 `VM Live Migration` strategies/policies:\\n\\n#### VM Live Migration Strategy: Pre-copy\\n\\nPre-copy is the default strategy. It should be used for most cases.\\n\\nThe way it works is as following:\\n1. The target VM is created, but the guest keeps running on the source VM.\\n1. The source starts sending chunks of VM state (mostly memory) to the target. This continues until all of the state has been transferred to the target.\\n1. The guest starts executing on the target VM. 4. The source VM is being removed.\\n\\nPre-copy is the safest and fastest strategy for most cases. Furthermore, it can be easily cancelled, can utilize multithreading, and more. If there is no real reason to use another strategy, this is definitely the strategy to go with.\\n\\nHowever, on some cases migrations might not converge easily, that is, by the time the chunk of source VM state would be received by the target VM, it would already be mutated by the source VM (which is the VM the guest executes on). There are many reasons for migrations to fail converging, such as a high dirty-rate or low resources like network bandwidth and CPU. On such scenarios, see the following alternative strategies below.\\n\\n#### VM Live Migration Strategy: Post-copy\\n\\nThe way post-copy migrations work is as following:\\n1. The target VM is created.\\n1. The guest is being run on the target VM.\\n1. The source starts sending chunks of VM state (mostly memory) to the target.\\n1. When the guest, running on the target VM, would access memory: 1. If the memory exists on the target VM, the guest can access it. 2. Otherwise, the target VM asks for a chunk of memory from the source VM.\\n1. Once all of the memory state is updated at the target VM, the source VM is being removed.\\n\\nThe main idea here is that the guest starts to run immediately on the target VM. This approach has advantages and disadvantages:\\n\\n**Advantages:**\\n\\n- The same memory chink is never being transferred twice. This is possible due to the fact that with post-copy it doesn\'t matter that a page had been dirtied since the guest is already running on the target VM.\\n- This means that a high dirty-rate has much less effect.\\n- Consumes less network bandwidth.\\n\\n**Disadvantages:**\\n\\n- When using post-copy, the VM state has no one source of truth. When the guest (running on the target VM) writes to memory, this memory is one part of the guest\'s state, but some other parts of it may still be updated only at the source VM. This situation is generally dangerous, since, for example, if either the target or guest VMs crash the state cannot be recovered.\\n- Slow warmup: when the guest starts executing, no memory is present at the target VM. Therefore, the guest would have to wait for a lot of memory in a short period of time.\\n- Slower than pre-copy on most cases.\\n- Harder to cancel a migration.\\n\\n#### VM Live Migration Strategy: Auto-converge\\n\\nAuto-converge is a technique to help pre-copy migrations converge faster without changing the core algorithm of how the migration works.\\n\\nSince a high dirty-rate is usually the most significant factor for migrations to not converge, auto-converge simply throttles the guest\'s CPU. If the migration would converge fast enough, the guest\'s CPU would not be throttled or throttled negligibly. But, if the migration would not converge fast enough, the CPU would be throttled more and more as time goes.\\n\\nThis technique dramatically increases the probability of the migration converging eventually.\\n\\n### Observe the VM Live Migration Progress and Result\\n\\n#### Migration Timeouts\\n\\nDepending on the type, the live migration process will copy virtual machine memory pages and disk blocks to the destination. During this process non-locked pages and blocks are being copied and become free for the instance to use again. To achieve a successful migration, it is assumed that the instance will write to the free pages and blocks (pollute the pages) at a lower rate than these are being copied.\\n\\n#### Completion Time\\n\\nIn some cases the virtual machine can write to different memory pages / disk blocks at a higher rate than these can be copied, which will prevent the migration process from completing in a reasonable amount of time. In this case, live migration will be aborted if it is running for a long period of time. The timeout is calculated base on the size of the VMI, it\'s memory and the ephemeral disks that are needed to be copied. The configurable parameter completionTimeoutPerGiB, which defaults to 800s is the time for GiB of data to wait for the migration to be completed before aborting it. A VMI with 8Gib of memory will time out after 6400 seconds.\\n\\n#### Progress Timeout\\n\\nA VM Live Migration will also be aborted when it notices that copying memory doesn\'t make any progress. The time to wait for live migration to make progress in transferring data is configurable by the `progressTimeout` parameter, which defaults to 150 seconds.\\n\\n## VM Live Migration Configurations\\n\\n### Changing Cluster Wide Migration Limits\\n\\nKubeVirt puts some limits in place so that migrations don\'t overwhelm the cluster. By default, it is to only run 5 migrations in parallel with an additional limit of a maximum of 2 outbound migrations per node. Finally, every migration is limited to a bandwidth of 64MiB/s.\\n\\nYou can change these values in the `kubevirt` CR:\\n```\\n apiVersion: kubevirt.io/v1\\n kind: Kubevirt\\n metadata:\\n name: kubevirt\\n namespace: kubevirt\\n spec:\\n configuration:\\n migrations:\\n parallelMigrationsPerCluster: 5\\n parallelOutboundMigrationsPerNode: 2\\n bandwidthPerMigration: 64Mi\\n completionTimeoutPerGiB: 800\\n progressTimeout: 150\\n disableTLS: false\\n nodeDrainTaintKey: \\"kubevirt.io/drain\\"\\n allowAutoConverge: false ---------------------\x3e related to: Auto-converge\\n allowPostCopy: false -------------------------\x3e related to: Post-copy\\n unsafeMigrationOverride: false\\n```\\n\\nRemember that most of these configurations can be overridden and fine-tuned to a specified group of VMs. For more information, please refer to the Migration Policies section below.\\n\\n### Migration Policies\\n\\n[Migration policies](https://kubevirt.io/user-guide/operations/migration_policies/) provides a new way of applying migration configurations to Virtual Machines. The policies can refine Kubevirt CR\'s `MigrationConfiguration` that sets the cluster-wide migration configurations. This way, the cluster-wide settings default how the migration policy can be refined (i.e., changed, removed, or added).\\n\\nRemember that migration policies are in version `v1alpha1`. This means that this API is not fully stable yet and that APIs may change in the future.\\n\\n#### Migration Configurations\\n\\nCurrently, the `MigrationPolicy` spec only includes the following configurations from Kubevirt CR\'s `MigrationConfiguration`. (In the future, more configurations that aren\'t part of Kubevirt CR will be added):\\n\\n```\\napiVersion: migrations.kubevirt.io/v1alpha1\\nkind: MigrationPolicy\\n spec:\\n allowAutoConverge: true\\n bandwidthPerMigration: 217Ki\\n completionTimeoutPerGiB: 23\\n allowPostCopy: false\\n```\\n\\nAll the above fields are optional. When omitted, the configuration will be applied as defined in KubevirtCR\'s `MigrationConfiguration`. This way, KubevirtCR will serve as a configurable set of defaults for both VMs that are not bound to any `MigrationPolicy` and VMs that are bound to a `MigrationPolicy` that does not define all fields of the configurations.\\n\\n##### Matching Policies to VMs\\n\\nNext in the spec are the selectors defining the group of VMs to apply the policy. The options to do so are the following.\\n\\nThis policy applies to the VMs in namespaces that have all the required labels:\\n\\n```\\napiVersion: migrations.kubevirt.io/v1alpha1\\nkind: MigrationPolicy\\n spec:\\n selectors:\\n namespaceSelector:\\n hpc-workloads: true # Matches a key and a value\\n```\\n\\nThe policy below applies to the VMs that have all the required labels:\\n\\n```\\napiVersion: migrations.kubevirt.io/v1alpha1\\nkind: MigrationPolicy\\n spec:\\n selectors:\\n virtualMachineInstanceSelector:\\n workload-type: db # Matches a key and a value\\n```\\n\\n## References\\n\\n### Documents\\n\\n### Libvirt Guest Migration\\n\\n`Libvirt` has a chapter to describe the pricipal of `VM/Guest Live Migration`.\\n\\nhttps://libvirt.org/migration.html\\n\\n### Kubevirt Live Migration\\n\\nhttps://kubevirt.io/user-guide/operations/live_migration/\\n\\n### Source Code\\n\\nThe `VM Live Migration` related configuration options are passed to each layer correspondingly.\\n\\n#### Kubevirt\\n\\nhttps://github.com/kubevirt/kubevirt/blob/d425593ae392111dab80403ef0cde82625e37653/pkg/virt-launcher/virtwrap/live-migration-source.go#L103\\n\\n```\\n...\\nimport \\"libvirt.org/go/libvirt\\"\\n\\n...\\n\\nfunc generateMigrationFlags(isBlockMigration, migratePaused bool, options *cmdclient.MigrationOptions) libvirt.DomainMigrateFlags {\\n...\\n\\tif options.AllowAutoConverge {\\n\\t\\tmigrateFlags |= libvirt.MIGRATE_AUTO_CONVERGE\\n\\t}\\n\\tif options.AllowPostCopy {\\n\\t\\tmigrateFlags |= libvirt.MIGRATE_POSTCOPY\\n\\t}\\n...\\n}\\n```\\n\\n#### Go Package Libvirt\\n\\nhttps://pkg.go.dev/libvirt.org/go/libvirt\\n\\n```\\nconst (\\n...\\n\\tMIGRATE_AUTO_CONVERGE = DomainMigrateFlags(C.VIR_MIGRATE_AUTO_CONVERGE)\\n\\tMIGRATE_RDMA_PIN_ALL = DomainMigrateFlags(C.VIR_MIGRATE_RDMA_PIN_ALL)\\n\\tMIGRATE_POSTCOPY = DomainMigrateFlags(C.VIR_MIGRATE_POSTCOPY)\\n...\\n)\\n```\\n\\n#### Libvirt\\n\\nhttps://github.com/libvirt/libvirt/blob/bfe53e9145cd5996a791c5caff0686572b850f82/include/libvirt/libvirt-domain.h#L1030\\n\\n```\\n /* Enable algorithms that ensure a live migration will eventually converge.\\n * This usually means the domain will be slowed down to make sure it does\\n * not change its memory faster than a hypervisor can transfer the changed\\n * memory to the destination host. VIR_MIGRATE_PARAM_AUTO_CONVERGE_*\\n * parameters can be used to tune the algorithm.\\n *\\n * Since: 1.2.3\\n */\\n VIR_MIGRATE_AUTO_CONVERGE = (1 << 13),\\n...\\n /* Setting the VIR_MIGRATE_POSTCOPY flag tells libvirt to enable post-copy\\n * migration. However, the migration will start normally and\\n * virDomainMigrateStartPostCopy needs to be called to switch it into the\\n * post-copy mode. See virDomainMigrateStartPostCopy for more details.\\n *\\n * Since: 1.3.3\\n */\\n VIR_MIGRATE_POSTCOPY = (1 << 15),\\n```"},{"id":"use_rook_ceph_external_storage","metadata":{"permalink":"/kb/use_rook_ceph_external_storage","editUrl":"https://github.com/harvester/harvesterhci.io/edit/main/kb/kb/2023-08-23/using_rook_ceph_storage.md","source":"@site/kb/2023-08-23/using_rook_ceph_storage.md","title":"Use Rook Ceph External Storage with Harvester","description":"Use Rook Ceph External Storage with Harvester","date":"2023-08-23T00:00:00.000Z","formattedDate":"August 23, 2023","tags":[{"label":"harvester","permalink":"/kb/tags/harvester"},{"label":"rook","permalink":"/kb/tags/rook"},{"label":"ceph","permalink":"/kb/tags/ceph"},{"label":"csi","permalink":"/kb/tags/csi"}],"readingTime":3.86,"truncated":false,"authors":[{"name":"Hang Yu","title":"Staff Software Engineer","url":"https://github.com/futuretea","image_url":"https://github.com/futuretea.png","imageURL":"https://github.com/futuretea.png"}],"frontMatter":{"title":"Use Rook Ceph External Storage with Harvester","description":"Use Rook Ceph External Storage with Harvester","slug":"use_rook_ceph_external_storage","authors":[{"name":"Hang Yu","title":"Staff Software Engineer","url":"https://github.com/futuretea","image_url":"https://github.com/futuretea.png","imageURL":"https://github.com/futuretea.png"}],"tags":["harvester","rook","ceph","csi"],"hide_table_of_contents":false},"prevItem":{"title":"VM Live Migration Policy and Configuration","permalink":"/kb/vm_live_migration_policy_and_configuration"},"nextItem":{"title":"Upgrade Guest Kubernetes Clusters to be Compatible with Harvester IP Pools","permalink":"/kb/upgrading_guest_clusters_with_harvester_ip_pool_compatibility"}},"content":"Starting with Harvester v1.2.0, it offers the capability to install a Container Storage Interface (CSI) in your Harvester cluster. This allows you to leverage external storage for the Virtual Machine\'s non-system data disk, giving you the flexibility to use different drivers tailored for specific needs, whether it\'s for performance optimization or seamless integration with your existing in-house storage solutions.\\n\\nIt\'s important to note that, despite this enhancement, the provisioner for the Virtual Machine (VM) image in Harvester still relies on Longhorn. Prior to version 1.2.0, Harvester exclusively supported Longhorn for storing VM data and did not offer support for external storage as a destination for VM data.\\n\\nOne of the options for integrating external storage with Harvester is Rook, an open-source cloud-native storage orchestrator. Rook provides a robust platform, framework, and support for Ceph storage, enabling seamless integration with cloud-native environments.\\n\\n[Ceph](https://ceph.io) is a software-defined distributed storage system that offers versatile storage capabilities, including file, block, and object storage. It is designed for large-scale production clusters and can be deployed effectively in such environments.\\n\\n[Rook](https://rook.io) simplifies the deployment and management of Ceph, offering self-managing, self-scaling, and self-healing storage services. It leverages Kubernetes resources to automate the deployment, configuration, provisioning, scaling, upgrading, and monitoring of Ceph.\\n\\nIn this article, we will walk you through the process of installing, configuring, and utilizing [Rook](https://rook.io/docs/rook/v1.12/Getting-Started/intro/) to use storage from an [existing external Ceph cluster](https://www.rook.io/docs/rook/v1.12/CRDs/Cluster/external-cluster/) as a data disk for a VM within the Harvester environment.\\n\\n## Install Harvester Cluster\\n\\nHarvester\'s operating system follows an immutable design, meaning that most OS files revert to their pre-configured state after a reboot. To accommodate Rook Ceph\'s requirements, you need to add specific persistent paths to the `os.persistentStatePaths` section in the [Harvester configuration](https://docs.harvesterhci.io/dev/install/harvester-configuration#ospersistent_state_paths). These paths include:\\n\\n```yaml\\nos:\\n persistent_state_paths:\\n - /var/lib/rook\\n - /var/lib/ceph\\n modules:\\n - rbd\\n - nbd\\n```\\n\\nAfter the cluster is installed, refer to [How can I access the kubeconfig file of the Harvester cluster?](https://docs.harvesterhci.io/v1.1/faq#how-can-i-access-the-kubeconfig-file-of-the-harvester-cluster) to get the kubeconfig of the Harvester cluster.\\n\\n## Install Rook to Harvester\\n\\nInstall Rook to the Harvester cluster by referring to [Rook Quickstart](https://rook.io/docs/rook/v1.12/Getting-Started/quickstart/).\\n\\n```bash\\ncurl -fsSLo rook.tar.gz https://github.com/rook/rook/archive/refs/tags/v1.12.2.tar.gz \\\\\\n && tar -zxf rook.tar.gz && cd rook-1.12.2/deploy/examples\\n# apply configurations ref: https://rook.github.io/docs/rook/v1.12/Getting-Started/example-configurations/\\nkubectl apply -f crds.yaml -f common.yaml -f operator.yaml\\nkubectl -n rook-ceph wait --for=condition=Available deploy rook-ceph-operator --timeout=10m\\n```\\n\\n## Using an existing external Ceph cluster\\n\\n1. Run the python script `create-external-cluster-resources.py` in the [existing external Ceph cluster](https://www.rook.io/docs/rook/v1.12/CRDs/Cluster/external-cluster/) for creating all users and keys.\\n```bash\\n# script help ref: https://www.rook.io/docs/rook/v1.12/CRDs/Cluster/external-cluster/#1-create-all-users-and-keys\\ncurl -s https://raw.githubusercontent.com/rook/rook/v1.12.2/deploy/examples/create-external-cluster-resources.py > create-external-cluster-resources.py\\npython3 create-external-cluster-resources.py --rbd-data-pool-name --namespace rook-ceph-external --format bash\\n```\\n\\n2. Copy the Bash output.\\n\\nExample output:\\n```\\nexport NAMESPACE=rook-ceph-external\\nexport ROOK_EXTERNAL_FSID=b3b47828-4c60-11ee-be38-51902f85c805\\nexport ROOK_EXTERNAL_USERNAME=client.healthchecker\\nexport ROOK_EXTERNAL_CEPH_MON_DATA=ceph-1=192.168.5.99:6789\\nexport ROOK_EXTERNAL_USER_SECRET=AQDd6/dkFyu/IhAATv/uCMbHtWk4AYK2KXzBhQ==\\nexport ROOK_EXTERNAL_DASHBOARD_LINK=https://192.168.5.99:8443/\\nexport CSI_RBD_NODE_SECRET=AQDd6/dk2HsjIxAA06Yw9UcOg0dfwV/9IFBRhA==\\nexport CSI_RBD_NODE_SECRET_NAME=csi-rbd-node\\nexport CSI_RBD_PROVISIONER_SECRET=AQDd6/dkEY1kIxAAAzrXZnVRf4x+wDUz1zyaQg==\\nexport CSI_RBD_PROVISIONER_SECRET_NAME=csi-rbd-provisioner\\nexport MONITORING_ENDPOINT=192.168.5.99\\nexport MONITORING_ENDPOINT_PORT=9283\\nexport RBD_POOL_NAME=test\\nexport RGW_POOL_PREFIX=default\\n```\\n\\n3. Consume the external Ceph cluster resources on the Harvester cluster.\\n\\n```bash\\n# Paste the above output from create-external-cluster-resources.py into import-env.sh\\nvim import-env.sh\\nsource import-env.sh\\n# this script will create a StorageClass ceph-rbd\\nsource import-external-cluster.sh\\n```\\n\\n```bash\\nkubectl apply -f common-external.yaml\\nkubectl apply -f cluster-external.yaml\\n# wait for all pods to become Ready\\nwatch \'kubectl --namespace rook-ceph get pods\'\\n```\\n\\n4. Create the VolumeSnapshotClass `csi-rbdplugin-snapclass-external`.\\n\\n```bash\\ncat >./csi/rbd/snapshotclass-external.yaml < **Settings**.\\n1. Find and select **csi-driver-config**, and then click on the **\u22ee** > **Edit Setting** to access the configuration options.\\n1. In the settings, set the **Provisioner** to `rook-ceph.rbd.csi.ceph.com`.\\n2. Next, specify the **Volume Snapshot Class Name** as `csi-rbdplugin-snapclass-external`. This setting points to the name of the `VolumeSnapshotClass` used for creating volume snapshots or VM snapshots.\\n3. Similarly, set the **Backup Volume Snapshot Class Name** to `csi-rbdplugin-snapclass-external`. This corresponds to the name of the `VolumeSnapshotClass` responsible for creating VM backups.\\n\\n![csi-driver-config-external](./imgs/csi-driver-config-external.png)\\n\\n## Use Rook Ceph in Harvester\\n\\nAfter successfully configuring these settings, you can proceed to utilize the Rook Ceph StorageClass, which is named `rook-ceph-block` for the internal Ceph cluster or named `ceph-rbd` for the external Ceph cluster. You can apply this StorageClass when creating an empty volume or adding a new block volume to a VM, enhancing your Harvester cluster\'s storage capabilities.\\n\\nWith these configurations in place, your Harvester cluster is ready to make the most of the Rook Ceph storage integration.\\n\\n![rook-ceph-volume-external](./imgs/rook-ceph-volume-external.png)\\n\\n![rook-ceph-vm-external](./imgs/rook-ceph-vm-external.png)"},{"id":"upgrading_guest_clusters_with_harvester_ip_pool_compatibility","metadata":{"permalink":"/kb/upgrading_guest_clusters_with_harvester_ip_pool_compatibility","editUrl":"https://github.com/harvester/harvesterhci.io/edit/main/kb/kb/2023-08-21/compatible_with_ip_pool_new_feature.md","source":"@site/kb/2023-08-21/compatible_with_ip_pool_new_feature.md","title":"Upgrade Guest Kubernetes Clusters to be Compatible with Harvester IP Pools","description":"Explain how to keep load balancer IP during upgrading guest cluster","date":"2023-08-21T00:00:00.000Z","formattedDate":"August 21, 2023","tags":[{"label":"harvester","permalink":"/kb/tags/harvester"},{"label":"load balancer","permalink":"/kb/tags/load-balancer"},{"label":"cloud provider","permalink":"/kb/tags/cloud-provider"},{"label":"ip pool","permalink":"/kb/tags/ip-pool"},{"label":"upgrade","permalink":"/kb/tags/upgrade"}],"readingTime":2.675,"truncated":false,"authors":[{"name":"Canwu Yao","title":"Software Engineer","url":"https://github.com/yaocw2020","image_url":"https://avatars.githubusercontent.com/u/7421463?s=400&v=4","imageURL":"https://avatars.githubusercontent.com/u/7421463?s=400&v=4"}],"frontMatter":{"title":"Upgrade Guest Kubernetes Clusters to be Compatible with Harvester IP Pools","description":"Explain how to keep load balancer IP during upgrading guest cluster","slug":"upgrading_guest_clusters_with_harvester_ip_pool_compatibility","authors":[{"name":"Canwu Yao","title":"Software Engineer","url":"https://github.com/yaocw2020","image_url":"https://avatars.githubusercontent.com/u/7421463?s=400&v=4","imageURL":"https://avatars.githubusercontent.com/u/7421463?s=400&v=4"}],"tags":["harvester","load balancer","cloud provider","ip pool","upgrade"],"hide_table_of_contents":false},"prevItem":{"title":"Use Rook Ceph External Storage with Harvester","permalink":"/kb/use_rook_ceph_external_storage"},"nextItem":{"title":"Using NetApp Storage on Harvester","permalink":"/kb/install_netapp_trident_csi"}},"content":"As **Harvester v1.2.0** is released, a new Harvester cloud provider version **0.2.2** is integrated into RKE2 **v1.24.15+rke2r1**, **v1.25.11+rke2r1**, **v1.26.6+rke2r1**, **v1.27.3+rke2r1**, and newer versions.\\n\\nWith Harvester v1.2.0, the new Harvester cloud provider offers enhanced load balancing capabilities for guest Kubernetes services. Specifically, it introduces the Harvester IP Pool feature, a built-in IP address management (IPAM) solution for the Harvester load balancer. It allows you to define an IP pool specific to a particular guest cluster by specifying the guest cluster name. For example, you can create an IP pool exclusively for the guest cluster named cluster2:\\n\\n![image](ippoolforcluster2.png)\\n\\nHowever, after upgrading, the feature is not automatically compatible with existing guest Kubernetes clusters, as they do not pass the correct cluster name to the Harvester cloud provider. Refer to [issue 4232](https://github.com/harvester/harvester/issues/4232) for more details. Users can manually upgrade the Harvester cloud provider using Helm as a workaround and provide the correct cluster name after upgrading. However, this would result in a change in the load balancer IPs. \\n\\nThis article outlines a workaround that allows you to leverage the new IP pool feature while keeping the load balancer IPs unchanged.\\n\\n## Prerequisites\\n\\n- Download the Harvester kubeconfig file from the Harvester UI. If you have imported Harvester into Rancher, do not use the kubeconfig file from the Rancher UI. Refer to [Access Harvester Cluster](https://docs.harvesterhci.io/v1.1/faq#how-can-i-access-the-kubeconfig-file-of-the-harvester-cluster) to get the desired one.\\n\\n- Download the kubeconfig file for the guest Kubernetes cluster you plan to upgrade. Refer to [Accessing Clusters with kubectl from Your Workstation](https://ranchermanager.docs.rancher.com/how-to-guides/new-user-guides/manage-clusters/access-clusters/use-kubectl-and-kubeconfig#accessing-clusters-with-kubectl-from-your-workstation) for instructions on how to download the kubeconfig file.\\n\\n## Steps to Keep Load Balancer IP\\n\\n1. Execute the following script before upgrading.\\n ```\\n curl -sfL https://raw.githubusercontent.com/harvester/harvesterhci.io/main/kb/2023-08-21/keepip.sh | sh -s before_upgrade \\n ```\\n\\n - ``: Path to the Harvester kubeconfig file.\\n - ``: Path to the kubeconfig file of your guest Kubernetes cluster.\\n - ``: Name of your guest cluster.\\n - ``: Namespace where the VMs of the guest cluster are located.\\n\\n The script will help users copy the DHCP information to the service annotation and modify the IP pool allocated history to make sure the IP is unchanged.\\n\\n ![image](before-upgrade.png)\\n\\n After executing the script, the load balancer service with DHCP mode will be annotated with the DHCP information. For example:\\n\\n ``` yaml\\n apiVersion: v1\\n kind: Service\\n metadata:\\n annotations:\\n kube-vip.io/hwaddr: 00:00:6c:4f:18:68\\n kube-vip.io/requestedIP: 172.19.105.215\\n name: lb0\\n namespace: default\\n ```\\n\\n As for the load balancer service with pool mode, the IP pool allocated history will be modified as the new load balancer name. For example:\\n\\n ``` yaml\\n apiVersion: loadbalancer.harvesterhci.io/v1beta1\\n kind: IPPool\\n metadata:\\n name: default\\n spec:\\n ...\\n status:\\n allocatedHistory:\\n 192.168.100.2: default/cluster-name-default-lb1-ddc13071 # replace the new load balancer name\\n ```\\n\\n2. Add network selector for the pool.\\n\\n For example, the following cluster is under the VM network `default/mgmt-untagged`. The network selector should be `default/mgmt-untagged`.\\n\\n ![image](network.png)\\n\\n ![image](network-selector.png)\\n\\n3. Upgrade the RKE2 cluster in the Rancher UI and select the new version.\\n \\n ![image](upgrade.png)\\n\\n1. Execute the script after upgrading.\\n ```\\n curl -sfL https://raw.githubusercontent.com/harvester/harvesterhci.io/main/kb/2023-08-21/keepip.sh | sh -s after_upgrade \\n ```\\n ![image](before-upgrade.png)\\n \\n In this step, the script wraps the operations to upgrade the Harvester cloud provider to set the cluster name. After the Harvester cloud provider is running, the new Harvester load balancers will be created with the unchanged IPs."},{"id":"install_netapp_trident_csi","metadata":{"permalink":"/kb/install_netapp_trident_csi","editUrl":"https://github.com/harvester/harvesterhci.io/edit/main/kb/kb/2023-08-11/using_netapp_third_party_storage.md","source":"@site/kb/2023-08-11/using_netapp_third_party_storage.md","title":"Using NetApp Storage on Harvester","description":"Installation procedure for NetApp Astra Trident CSI Driver","date":"2023-08-11T00:00:00.000Z","formattedDate":"August 11, 2023","tags":[{"label":"harvester","permalink":"/kb/tags/harvester"}],"readingTime":6.08,"truncated":false,"authors":[{"name":"Jeff Radick","title":"Staff Software Engineer"}],"frontMatter":{"title":"Using NetApp Storage on Harvester","description":"Installation procedure for NetApp Astra Trident CSI Driver","slug":"install_netapp_trident_csi","authors":[{"name":"Jeff Radick","title":"Staff Software Engineer"}],"tags":["harvester"],"hide_table_of_contents":false},"prevItem":{"title":"Upgrade Guest Kubernetes Clusters to be Compatible with Harvester IP Pools","permalink":"/kb/upgrading_guest_clusters_with_harvester_ip_pool_compatibility"},"nextItem":{"title":"Configure PriorityClass on Longhorn System Components","permalink":"/kb/configure_priority_class_longhorn"}},"content":"This article covers instructions for installing the Netapp Astra Trident CSI driver into a Harvester cluster, which enables NetApp storage systems to store storage volumes usable by virtual machines running in Harvester.\\n\\nThe NetApp storage will be an option in addition to the normal Longhorn storage; it will not replace Longhorn. Virtual machine images will still be stored using Longhorn.\\n\\nThis has been tested with Harvester 1.2.0 and Trident v23.07.0.\\n\\nThis procedure only works to access storage via iSCSI, not NFS.\\n\\n:::note\\n3rd party storage classes (including those based on Trident) can only be used for non-boot volumes of Harvester VMs.\\n:::\\n\\n# Detailed Instructions\\n\\nWe assume that before beginning this procedure, a Harvester cluster and a NetApp ONTAP storage system are both installed and configured for use.\\n\\nMost of these steps can be performed on any system with the `helm` and `kubectl` commands installed and network connectivity to the management port of the Harvester cluster. Let\'s call this your workstation. Certain steps must be performed on one or more cluster nodes themselves. The steps described below should be done on your workstation unless otherwise indicated.\\n\\nThe last step (enabling multipathd) should be done on all nodes after the Trident CSI has been installed.\\n\\nCertain parameters of your installation will require modification of details in the examples in the procedure given below. Those which you may wish to modify include:\\n\\n* The namespace. `trident` is used as the namespace in the examples, but you may prefer to use another.\\n* The name of the deployment. `mytrident` is used but you can change this to something else.\\n* The management IP address of the ONTAP storage system\\n* Login credentials (username and password) of the ONTAP storage system\\n\\nThe procedure is as follows.\\n\\n1. Read the NetApp Astra Trident documentation:\\n\\n * https://docs.netapp.com/us-en/trident/\\n * https://docs.netapp.com/us-en/trident/trident-get-started/kubernetes-deploy-operator.html\\n * https://docs.netapp.com/us-en/trident/trident-get-started/kubernetes-deploy-helm.html#deploy-the-trident-operator-and-install-astra-trident-using-helm\\n\\n The simplest method is to install using Helm; that process is described here.\\n\\n1. Download the KubeConfig from the Harvester cluster.\\n\\n * Open the web UI for your Harvester cluster\\n * In the lower left corner, click the \\"Support\\" link. This will take you to a \\"Harvester Support\\" page.\\n * Click the button labeled \\"Download KubeConfig\\". This will download a your cluster config in a file called \\"local.yaml\\" by default.\\n * Move this file to a convenient location and set your `KUBECONFIG` environment variable to the path of this file.\\n\\n1. Prepare the cluster for installation of the Helm chart.\\n\\n Before starting installation of the helm chart, special authorization must be provided to enable certain modifications to be made during the installation.\\n This addresses the issue described here: https://github.com/NetApp/trident/issues/839\\n\\n * Put the following text into a file. For this example we\'ll call it `authorize_trident.yaml`.\\n\\n ```yaml\\n ---\\n apiVersion: rbac.authorization.k8s.io/v1\\n kind: ClusterRole\\n metadata:\\n name: trident-operator-psa\\n rules:\\n - apiGroups:\\n - management.cattle.io\\n resources:\\n - projects\\n verbs:\\n - updatepsa\\n ---\\n apiVersion: rbac.authorization.k8s.io/v1\\n kind: ClusterRoleBinding\\n metadata:\\n name: trident-operator-psa\\n roleRef:\\n apiGroup: rbac.authorization.k8s.io\\n kind: ClusterRole\\n name: trident-operator-psa\\n subjects:\\n - kind: ServiceAccount\\n name: trident-operator\\n namespace: trident\\n ```\\n\\n * Apply this manifest via the command `kubectl apply -f authorize_trident.yaml`.\\n\\n1. Install the helm chart.\\n\\n * First you will need to add the Astra Trident Helm repository:\\n\\n ```shell\\n helm repo add netapp-trident https://netapp.github.io/trident-helm-chart\\n ```\\n\\n * Next, install the Helm chart. This example uses `mytrident` as the deployment name, `trident` as the namespace, and 23.07.0 as the version number to install:\\n\\n ```shell\\n helm install mytrident netapp-trident/trident-operator --version 23.07.0 --create-namespace --namespace trident\\n ```\\n\\n * The NetApp documentation describes variations on how you can do this.\\n\\n1. Download and extract the tridentctl command, which will be needed for the next few steps.\\n\\n This and the next few steps need to be performed logged into a master node of the Harvester cluster, using root access.\\n\\n ```shell\\n cd /tmp\\n curl -L -o trident-installer-23.07.0.tar.gz https://github.com/NetApp/trident/releases/download/v23.07.0/trident-installer-23.07.0.tar.gz\\n tar -xf trident-installer-23.07.0.tar.gz\\n cd trident-installer\\n ```\\n\\n1. Install a backend.\\n\\n This part is specific to Harvester.\\n\\n 1. Put the following into a text file, for example /tmp/backend.yaml\\n\\n ```yaml\\n version: 1\\n backendName: default_backend_san\\n storageDriverName: ontap-san-economy\\n managementLIF: 172.19.97.114\\n svm: default_backend\\n username: admin\\n password: password1234\\n labels:\\n name: default_backend_san\\n ```\\n\\n The LIF IP address, username, and password of this file\\n should be replaced with the management LIF and credentials\\n for the ONTAP system.\\n\\n 1. Create the backend\\n\\n ```shell\\n ./tridentctl create backend -f /tmp/backend.yaml -n trident\\n ```\\n\\n 1. Check that it is created\\n\\n ```shell\\n ./tridentctl get backend -n trident\\n ```\\n\\n1. Define a StorageClass and SnapshotClass.\\n\\n 1. Put the following into a file, for example `/tmp/storage.yaml`\\n\\n ```yaml\\n ---\\n apiVersion: storage.k8s.io/v1\\n kind: StorageClass\\n metadata:\\n name: ontap-san-economy\\n provisioner: csi.trident.netapp.io\\n parameters:\\n selector: \\"name=default_backend_san\\"\\n ---\\n apiVersion: snapshot.storage.k8s.io/v1\\n kind: VolumeSnapshotClass\\n metadata:\\n name: csi-snapclass\\n driver: csi.trident.netapp.io\\n deletionPolicy: Delete\\n ```\\n\\n 1. Apply the definitions:\\n\\n ```shell\\n kubectl apply -f /tmp/storage.yaml\\n ```\\n\\n1. Enable multipathd\\n\\n The following is required to enable multipathd.\\n This must be done on every node of the Harvester cluster, using root access.\\n The preceding steps should only be done once on a single node.\\n\\n 1. Create this file in `/oem/99_multipathd.yaml`:\\n\\n ```yaml\\n stages:\\n default:\\n - name: \\"Setup multipathd\\"\\n systemctl:\\n enable:\\n - multipathd\\n start:\\n - multipathd\\n ```\\n\\n 1. Configure `multipathd` to exclude pathnames used by Longhorn.\\n\\n This part is a little tricky. `multipathd` will automatically discover\\n device names matching a certain pattern, and attempt to set up multipathing on them.\\n Unfortunately, Longhorn\'s device names follow the same pattern, and\\n will not work correctly if `multipathd` tries to use those devices.\\n\\n Therefore the file `/etc/multipath.conf` must be set up on each node\\n so as to prevent `multipathd` from touching any of the devices\\n that Longhorn will use. Unfortunately, it is not possible to know\\n in advance which device names will be used until the volumes are attached\\n to a VM when the VM is started, or when the volumes are hot-added to a running VM.\\n The recommended method is to \\"whitelist\\" the Trident devices using device\\n properties rather than device naming. The properties to allow are the\\n device vendor and product. Here is an example of what you\'ll want in `/etc/multipath.conf`:\\n\\n ```text\\n blacklist {\\n device {\\n vendor \\"!NETAPP\\"\\n product \\"!LUN\\"\\n }\\n }\\n blacklist_exceptions {\\n device {\\n vendor \\"NETAPP\\"\\n product \\"LUN\\"\\n }\\n }\\n ```\\n\\n This example only works if NetApp is the only storage provider in the system for which `multipathd` must be used. More complex environments will require more complex configuration.\\n\\n Explicitly putting that content into `/etc/multipath.conf` will work when you start `multipathd` as described below, but the change in `/etc` will not persist across node reboots. To solve that problem, you should add another file to `/oem` that will re-generate `/etc/multipath.conf` when the node reboots. The following example will create the `/etc/multipath.conf` given in the example above, but may need to be modified for your environment if you have a more complex iSCSI configuration:\\n\\n ```text\\n stages:\\n initramfs:\\n - name: \\"Configure multipath blacklist and whitelist\\"\\n files:\\n - path: /etc/multipath.conf\\n permissions: 0644\\n owner: 0\\n group: 0\\n content: |\\n blacklist {\\n device {\\n vendor \\"!NETAPP\\"\\n product \\"!LUN\\"\\n }\\n }\\n blacklist_exceptions {\\n device {\\n vendor \\"NETAPP\\"\\n product \\"LUN\\"\\n }\\n }\\n ```\\n\\n Remember, this has to be done on every node.\\n\\n 1. Enable multipathd.\\n\\n Adding the above files to `/oem` will take effect on the next reboot of the node; `multipathd` can be enabled immediately without rebooting the node using the following commands:\\n\\n ```shell\\n systemctl enable multipathd\\n systemctl start multipathd\\n ```\\n\\n After the above steps, the `ontap-san-economy` storage class should be available when creating a volume for a Harvester VM."},{"id":"configure_priority_class_longhorn","metadata":{"permalink":"/kb/configure_priority_class_longhorn","editUrl":"https://github.com/harvester/harvesterhci.io/edit/main/kb/kb/2023-07-25/configure_priority_class_longhorn.md","source":"@site/kb/2023-07-25/configure_priority_class_longhorn.md","title":"Configure PriorityClass on Longhorn System Components","description":"Configure priority classes on Longhorn system components","date":"2023-07-25T00:00:00.000Z","formattedDate":"July 25, 2023","tags":[{"label":"harvester","permalink":"/kb/tags/harvester"},{"label":"longhorn","permalink":"/kb/tags/longhorn"},{"label":"priority class","permalink":"/kb/tags/priority-class"}],"readingTime":6.405,"truncated":false,"authors":[{"name":"Kiefer Chang","title":"Engineer Manager","url":"https://github.com/bk201","image_url":"https://github.com/bk201.png","imageURL":"https://github.com/bk201.png"}],"frontMatter":{"title":"Configure PriorityClass on Longhorn System Components","description":"Configure priority classes on Longhorn system components","slug":"configure_priority_class_longhorn","authors":[{"name":"Kiefer Chang","title":"Engineer Manager","url":"https://github.com/bk201","image_url":"https://github.com/bk201.png","imageURL":"https://github.com/bk201.png"}],"tags":["harvester","longhorn","priority class"],"hide_table_of_contents":false},"prevItem":{"title":"Using NetApp Storage on Harvester","permalink":"/kb/install_netapp_trident_csi"},"nextItem":{"title":"Package your own Toolbox Image","permalink":"/kb/package_your_own_toolbox_image"}},"content":"**Harvester v1.2.0** introduces a new enhancement where Longhorn system-managed components in newly-deployed clusters are automatically assigned a `system-cluster-critical` priority class by default. However, when upgrading your Harvester clusters from previous versions, you may notice that Longhorn system-managed components do not have any priority class set.\\n\\nThis behavior is intentional and aimed at supporting zero-downtime upgrades. Longhorn does not allow changing the `priority-class` setting when attached volumes exist. For more details, please refer to [Setting Priority Class During Longhorn Installation](https://longhorn.io/docs/1.4.3/advanced-resources/deploy/priority-class/#setting-priority-class-during-longhorn-installation)).\\n\\nThis article explains how to manually configure priority classes for Longhorn system-managed components after upgrading your Harvester cluster, ensuring that your Longhorn components have the appropriate priority class assigned and maintaining the stability and performance of your system.\\n\\n## Stop all virtual machines\\n\\nStop all virtual machines (VMs) to detach all volumes. Please back up any work before doing this.\\n1. [Login to a Harvester controller node and become root](https://docs.harvesterhci.io/v1.1/troubleshooting/os#how-to-log-into-a-harvester-node).\\n2. Get all running VMs and write down their namespaces and names:\\n\\n ```bash\\n kubectl get vmi -A\\n ```\\n\\n Alternatively, you can get this information by backing up the Virtual Machine Instance (VMI) manifests with the following command:\\n ```bash\\n kubectl get vmi -A -o json > vmi-backup.json\\n ```\\n\\n3. Shut down all VMs. Log in to all running VMs and shut them down gracefully (recommended). Or use the following command to send shutdown signals to all VMs:\\n ```bash\\n kubectl get vmi -A -o json | jq -r \'.items[] | [.metadata.name, .metadata.namespace] | @tsv\' | while IFS=$\'\\\\t\' read -r name namespace; do\\n if [ -z \\"$name\\" ]; then\\n break\\n fi\\n echo \\"Stop ${namespace}/${name}\\"\\n virtctl stop $name -n $namespace\\n done\\n ```\\n\\n :::note\\n You can also stop all VMs from the Harvester UI:\\n 1. Go to the **Virtual Machines** page.\\n 2. For each VM, select **\u22ee** > **Stop**.\\n :::\\n\\n4. Ensure there are no running VMs:\\n\\n Run the command:\\n\\n ```bash\\n kubectl get vmi -A\\n ```\\n\\n The above command must return:\\n\\n ```bash\\n No resources found\\n\\n## Scale down monitoring pods\\n\\n1. Scale down the Prometheus deployment. Run the following command and wait for all Prometheus pods to terminate:\\n\\n ```bash\\n kubectl patch -n cattle-monitoring-system prometheus/rancher-monitoring-prometheus --patch \'{\\"spec\\": {\\"replicas\\": 0}}\' --type merge && \\\\\\n sleep 5 && \\\\\\n kubectl rollout status --watch=true -n cattle-monitoring-system statefulset/prometheus-rancher-monitoring-prometheus\\n ```\\n\\n A sample output looks like this:\\n\\n ```\\n prometheus.monitoring.coreos.com/rancher-monitoring-prometheus patched\\n statefulset rolling update complete 0 pods at revision prometheus-rancher-monitoring-prometheus-cbf6bd5f7...\\n ```\\n\\n2. Scale down the AlertManager deployment. Run the following command and wait for all AlertManager pods to terminate:\\n\\n ```bash\\n kubectl patch -n cattle-monitoring-system alertmanager/rancher-monitoring-alertmanager --patch \'{\\"spec\\": {\\"replicas\\": 0}}\' --type merge && \\\\\\n sleep 5 && \\\\\\n kubectl rollout status --watch=true -n cattle-monitoring-system statefulset/alertmanager-rancher-monitoring-alertmanager\\n ```\\n\\n A sample output looks like this:\\n\\n ```\\n alertmanager.monitoring.coreos.com/rancher-monitoring-alertmanager patched\\n statefulset rolling update complete 0 pods at revision alertmanager-rancher-monitoring-alertmanager-c8c459dff...\\n ```\\n\\n3. Scale down the Grafana deployment. Run the following command and wait for all Grafana pods to terminate:\\n\\n ```bash\\n kubectl scale --replicas=0 deployment/rancher-monitoring-grafana -n cattle-monitoring-system && \\\\\\n sleep 5 && \\\\\\n kubectl rollout status --watch=true -n cattle-monitoring-system deployment/rancher-monitoring-grafana\\n ```\\n\\n A sample output looks like this:\\n\\n ```\\n deployment.apps/rancher-monitoring-grafana scaled\\n deployment \\"rancher-monitoring-grafana\\" successfully rolled out\\n ```\\n\\n## Scale down vm-import-controller pods\\n\\n1. Check if the [`vm-import-controller`](https://docs.harvesterhci.io/v1.1/advanced/vmimport) addon is enabled and configured with a persistent volume with the following command:\\n\\n ```bash\\n kubectl get pvc -n harvester-system harvester-vm-import-controller\\n ```\\n\\n If the above command returns an output like this, you must scale down the `vm-import-controller` pod. Otherwise, you can skip the following step.\\n ```\\n NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE\\n harvester-vm-import-controller Bound pvc-eb23e838-4c64-4650-bd8f-ba7075ab0559 200Gi RWO harvester-longhorn 2m53s\\n ```\\n\\n2. Scale down the `vm-import-controller` pods with the following command:\\n\\n ```bash\\n kubectl scale --replicas=0 deployment/harvester-vm-import-controller -n harvester-system && \\\\\\n sleep 5 && \\\\\\n kubectl rollout status --watch=true -n harvester-system deployment/harvester-vm-import-controller\\n ```\\n\\n A sample output looks like this:\\n\\n ```\\n deployment.apps/harvester-vm-import-controller scaled\\n deployment \\"harvester-vm-import-controller\\" successfully rolled out\\n ```\\n\\n## Set the `priority-class` setting\\n\\n1. Before applying the `priority-class` setting, you need to verify all volumes are detached. Run the following command to verify the `STATE` of each volume is `detached`:\\n\\n ```bash\\n kubectl get volumes.longhorn.io -A\\n ```\\n\\n Verify the output looks like this:\\n ```\\n NAMESPACE NAME STATE ROBUSTNESS SCHEDULED SIZE NODE AGE\\n longhorn-system pvc-5743fd02-17a3-4403-b0d3-0e9b401cceed detached unknown 5368709120 15d\\n longhorn-system pvc-7e389fe8-984c-4049-9ba8-5b797cb17278 detached unknown 53687091200 15d\\n longhorn-system pvc-8df64e54-ecdb-4d4e-8bab-28d81e316b8b detached unknown 2147483648 15d\\n longhorn-system pvc-eb23e838-4c64-4650-bd8f-ba7075ab0559 detached unknown 214748364800 11m\\n ```\\n\\n1. Set the `priority-class` setting with the following command:\\n\\n ```bash\\n kubectl patch -n longhorn-system settings.longhorn.io priority-class --patch \'{\\"value\\": \\"system-cluster-critical\\"}\' --type merge\\n ```\\n\\n Longhorn system-managed pods will restart and then you need to check if all the system-managed components have a priority class set:\\n\\n Get the value of the priority class `system-cluster-critical`:\\n ```bash\\n kubectl get priorityclass system-cluster-critical\\n ```\\n\\n Verify the output looks like this:\\n ```\\n NAME VALUE GLOBAL-DEFAULT AGE\\n system-cluster-critical 2000000000 false 15d\\n ```\\n\\n3. Use the following command to get pods\' priority in the `longhorn-system` namespace:\\n\\n ```bash\\n kubectl get pods -n longhorn-system -o custom-columns=\\"Name\\":metadata.name,\\"Priority\\":.spec.priority\\n ```\\n\\n4. Verify all system-managed components\' pods have the correct priority. System-managed components include:\\n\\n - `csi-attacher`\\n - `csi-provisioner`\\n - `csi-resizer`\\n - `csi-snapshotter`\\n - `engine-image-ei`\\n - `instance-manager-e`\\n - `instance-manager-r`\\n - `longhorn-csi-plugin`\\n\\n## Scale up vm-import-controller pods\\n\\nIf you scale down the `vm-import-controller` pods, you must scale it up again. \\n\\n1. Scale up the `vm-import-controller` pod. Run the command: \\n\\n ```bash\\n kubectl scale --replicas=1 deployment/harvester-vm-import-controller -n harvester-system && \\\\\\n sleep 5 && \\\\\\n kubectl rollout status --watch=true -n harvester-system deployment/harvester-vm-import-controller\\n ```\\n\\n A sample output looks like this:\\n\\n ```\\n deployment.apps/harvester-vm-import-controller scaled\\n Waiting for deployment \\"harvester-vm-import-controller\\" rollout to finish: 0 of 1 updated replicas are available...\\n deployment \\"harvester-vm-import-controller\\" successfully rolled out\\n ```\\n\\n2. Verify `vm-import-controller` is running using the following command:\\n ```bash\\n kubectl get pods --selector app.kubernetes.io/instance=vm-import-controller -A\\n ```\\n\\n A sample output looks like this, the pod\'s `STATUS` must be `Running`:\\n ```\\n NAMESPACE NAME READY STATUS RESTARTS AGE\\n harvester-system harvester-vm-import-controller-6bd8f44f55-m9k86 1/1 Running 0 4m53s\\n ```\\n\\n## Scale up monitoring pods\\n\\n1. Scale up the Prometheus deployment. Run the following command and wait for all Prometheus pods to roll out:\\n\\n ```bash\\n kubectl patch -n cattle-monitoring-system prometheus/rancher-monitoring-prometheus --patch \'{\\"spec\\": {\\"replicas\\": 1}}\' --type merge && \\\\\\n sleep 5 && \\\\\\n kubectl rollout status --watch=true -n cattle-monitoring-system statefulset/prometheus-rancher-monitoring-prometheus\\n ```\\n\\n A sample output looks like:\\n ```\\n prometheus.monitoring.coreos.com/rancher-monitoring-prometheus patched\\n Waiting for 1 pods to be ready...\\n statefulset rolling update complete 1 pods at revision prometheus-rancher-monitoring-prometheus-cbf6bd5f7...\\n ```\\n\\n2. Scale down the AlertManager deployment. Run the following command and wait for all AlertManager pods to roll out:\\n\\n ```bash\\n kubectl patch -n cattle-monitoring-system alertmanager/rancher-monitoring-alertmanager --patch \'{\\"spec\\": {\\"replicas\\": 1}}\' --type merge && \\\\\\n sleep 5 && \\\\\\n kubectl rollout status --watch=true -n cattle-monitoring-system statefulset/alertmanager-rancher-monitoring-alertmanager\\n ```\\n\\n A sample output looks like this:\\n\\n ```\\n alertmanager.monitoring.coreos.com/rancher-monitoring-alertmanager patched\\n Waiting for 1 pods to be ready...\\n statefulset rolling update complete 1 pods at revision alertmanager-rancher-monitoring-alertmanager-c8bd4466c...\\n ```\\n\\n3. Scale down the Grafana deployment. Run the following command and wait for all Grafana pods to roll out:\\n\\n ```bash\\n kubectl scale --replicas=1 deployment/rancher-monitoring-grafana -n cattle-monitoring-system && \\\\\\n sleep 5 && \\\\\\n kubectl rollout status --watch=true -n cattle-monitoring-system deployment/rancher-monitoring-grafana\\n ```\\n\\n A sample output looks like this:\\n\\n ```\\n deployment.apps/rancher-monitoring-grafana scaled\\n Waiting for deployment \\"rancher-monitoring-grafana\\" rollout to finish: 0 of 1 updated replicas are available...\\n deployment \\"rancher-monitoring-grafana\\" successfully rolled out\\n ```\\n\\n## Start virtual machines\\n\\n1. Start a VM with the command:\\n\\n ```bash\\n virtctl start $name -n $namespace\\n ```\\n\\n Replace `$name` with the VM\'s name and `$namespace` with the VM\'s namespace. You can list all virtual machines with the command:\\n\\n ```bash\\n kubectl get vms -A\\n ```\\n\\n :::note\\n You can also stop all VMs from the Harvester UI:\\n 1. Go to the **Virtual Machines** page.\\n 2. For each VM, select **\u22ee** > **Start**.\\n :::\\n\\n Alternatively, you can start all running VMs with the following command:\\n\\n ```bash\\n cat vmi-backup.json | jq -r \'.items[] | [.metadata.name, .metadata.namespace] | @tsv\' | while IFS=$\'\\\\t\' read -r name namespace; do\\n if [ -z \\"$name\\" ]; then\\n break\\n fi\\n echo \\"Start ${namespace}/${name}\\"\\n virtctl start $name -n $namespace || true\\n done\\n ```"},{"id":"package_your_own_toolbox_image","metadata":{"permalink":"/kb/package_your_own_toolbox_image","editUrl":"https://github.com/harvester/harvesterhci.io/edit/main/kb/kb/2023-07-06/package_your_own_toolbox_image.md","source":"@site/kb/2023-07-06/package_your_own_toolbox_image.md","title":"Package your own Toolbox Image","description":"How to package your own toolbox image","date":"2023-07-06T00:00:00.000Z","formattedDate":"July 6, 2023","tags":[{"label":"debug","permalink":"/kb/tags/debug"},{"label":"harvester","permalink":"/kb/tags/harvester"},{"label":"container","permalink":"/kb/tags/container"}],"readingTime":1.655,"truncated":false,"authors":[{"name":"Vicente Cheng","title":"Senior Software Engineer","url":"https://github.com/Vicente-Cheng","image_url":"https://github.com/Vicente-Cheng.png","imageURL":"https://github.com/Vicente-Cheng.png"}],"frontMatter":{"title":"Package your own Toolbox Image","description":"How to package your own toolbox image","slug":"package_your_own_toolbox_image","authors":[{"name":"Vicente Cheng","title":"Senior Software Engineer","url":"https://github.com/Vicente-Cheng","image_url":"https://github.com/Vicente-Cheng.png","imageURL":"https://github.com/Vicente-Cheng.png"}],"tags":["debug","harvester","container"],"hide_table_of_contents":false},"prevItem":{"title":"Configure PriorityClass on Longhorn System Components","permalink":"/kb/configure_priority_class_longhorn"},"nextItem":{"title":"Scan and Repair Root Filesystem of VirtualMachine","permalink":"/kb/scan-and-repair-vm-root-filesystem"}},"content":"Harvester OS is designed as an immutable operating system, which means you cannot directly install additional packages on it. While there is a way to [install packages](https://docs.harvesterhci.io/dev/troubleshooting/os#how-can-i-install-packages-why-are-some-paths-read-only), it is strongly advised against doing so, as it may lead to system instability.\\n\\nIf you only want to debug with the system, the preferred way is to package the toolbox image with all the needed packages. \\n\\nThis article shares how to package your toolbox image and how to install any packages on the toolbox image that help you debug the system.\\n\\nFor example, if you want to analyze a storage performance issue, you can install `blktrace` on the toolbox image.\\n\\n\\n## Create a Dockerfile\\n\\n```bash\\nFROM opensuse/leap:15.4\\n\\n# Install blktrace\\nRUN zypper in -y \\\\\\n blktrace\\n\\nRUN zypper clean --all\\n```\\n\\n## Build the image and push\\n```bash\\n# assume you are in the directory of Dockerfile\\n$ docker build -t harvester/toolbox:dev .\\n.\\n.\\n.\\nnaming to docker.io/harvester/toolbox:dev ...\\n$ docker push harvester/toolbox:dev\\n.\\n.\\nd4b76d0683d4: Pushed \\na605baa225e2: Pushed \\n9e9058bdf63c: Layer already exists \\n```\\n\\nAfter you build and push the image, you can run the toolbox using this image to trace storage performance.\\n\\n## Run the toolbox\\n```bash\\n# use `privileged` flag only when you needed. blktrace need debugfs, so I add extra mountpoint.\\ndocker run -it --privileged -v /sys/kernel/debug/:/sys/kernel/debug/ --rm harvester/toolbox:dev bash\\n\\n# test blktrace\\n6ffa8eda3aaf:/ $ blktrace -d /dev/nvme0n1 -o - | blkparse -i -\\n259,0 10 3414 0.020814875 34084 Q WS 2414127984 + 8 [fio]\\n259,0 10 3415 0.020815190 34084 G WS 2414127984 + 8 [fio]\\n259,0 10 3416 0.020815989 34084 C WS 3206896544 + 8 [0]\\n259,0 10 3417 0.020816652 34084 C WS 2140319184 + 8 [0]\\n259,0 10 3418 0.020817992 34084 P N [fio]\\n259,0 10 3419 0.020818227 34084 U N [fio] 1\\n259,0 10 3420 0.020818437 34084 D WS 2414127984 + 8 [fio]\\n259,0 10 3421 0.020821826 34084 Q WS 1743934904 + 8 [fio]\\n259,0 10 3422 0.020822150 34084 G WS 1743934904 + 8 [fio]\\n\\n```"},{"id":"scan-and-repair-vm-root-filesystem","metadata":{"permalink":"/kb/scan-and-repair-vm-root-filesystem","editUrl":"https://github.com/harvester/harvesterhci.io/edit/main/kb/kb/2023-02-01/scan_and_repair_filesystem.md","source":"@site/kb/2023-02-01/scan_and_repair_filesystem.md","title":"Scan and Repair Root Filesystem of VirtualMachine","description":"Scan and repair root filesystem of VM","date":"2023-02-01T00:00:00.000Z","formattedDate":"February 1, 2023","tags":[{"label":"storage","permalink":"/kb/tags/storage"},{"label":"longhorn","permalink":"/kb/tags/longhorn"},{"label":"root","permalink":"/kb/tags/root"},{"label":"filesystem","permalink":"/kb/tags/filesystem"}],"readingTime":3.37,"truncated":false,"authors":[{"name":"Vicente Cheng","title":"Senior Software Engineer","url":"https://github.com/Vicente-Cheng","image_url":"https://github.com/Vicente-Cheng.png","imageURL":"https://github.com/Vicente-Cheng.png"}],"frontMatter":{"title":"Scan and Repair Root Filesystem of VirtualMachine","description":"Scan and repair root filesystem of VM","slug":"scan-and-repair-vm-root-filesystem","authors":[{"name":"Vicente Cheng","title":"Senior Software Engineer","url":"https://github.com/Vicente-Cheng","image_url":"https://github.com/Vicente-Cheng.png","imageURL":"https://github.com/Vicente-Cheng.png"}],"tags":["storage","longhorn","root","filesystem"],"hide_table_of_contents":false},"prevItem":{"title":"Package your own Toolbox Image","permalink":"/kb/package_your_own_toolbox_image"},"nextItem":{"title":"Evicting Replicas From a Disk (the CLI way)","permalink":"/kb/evicting-replicas-from-a-disk-the-cli-way"}},"content":"In earlier versions of Harvester (v1.0.3 and prior), Longhorn volumes may get corrupted during the replica rebuilding process (reference: [Analysis: Potential Data/Filesystem Corruption](https://longhorn.io/kb/troubleshooting-volume-filesystem-corruption/#solution)). In Harvester v1.1.0 and later versions, the Longhorn team has fixed this issue. This article covers manual steps you can take to scan the VM\'s filesystem and repair it if needed.\\n\\n\\n## Stop The VM And Backup Volume\\n\\nBefore you scan the filesystem, it is recommend you back up the volume first. For an example, refer to the following steps to stop the VM and backup the volume.\\n\\n- Find the target VM.\\n\\n![finding the target VM](./imgs/finding_the_target_vm.png)\\n\\n- Stop the target VM.\\n\\n![Stop the target VM](./imgs/stop_the_target_vm.png)\\n\\nThe target VM is stopped and the related volumes are detached. Now go to the Longhorn UI to backup this volume.\\n\\n- Enable `Developer Tools & Features` (Preferences -> Enable Developer Tools & Features).\\n\\n![Preferences then enable developer mode](./imgs/preferences_enable_developer_mode.png)\\n![Enable the developer mode](./imgs/enable_the_developer_mode.png)\\n\\n- Click the `\u22ee` button and select **Edit Config** to edit the config page of the VM.\\n\\n![goto edit config page of VM](./imgs/goto_vm_edit_config_page.png)\\n\\n- Go to the `Volumes` tab and select `Check volume details.`\\n\\n![link to longhorn volume page](./imgs/link_to_longhorn_volume.png)\\n\\n- Click the dropdown menu on the right side and select \'Attach\' to attach the volume again. \\n\\n![attach this volume again](./imgs/attach_this_volume_again.png)\\n\\n- Select the attached node. \\n\\n![choose the attached node](./imgs/choose_the_attached_node.png)\\n\\n- Check the volume attached under `Volume Details` and select `Take Snapshot` on this volume page.\\n\\n![take snapshot on volume page](./imgs/take_snapshot_on_volume_page.png)\\n\\n- Confirm that the snapshot is ready.\\n\\n![check the snapshot is ready](./imgs/check_the_snapshot_is_ready.png)\\n\\nNow that you completed the volume backup, you need to scan and repair the root filesystem.\\n\\n## Scanning the root filesystem and repairing\\n\\nThis section will introduce how to scan the filesystem (e.g., XFS, EXT4) using related tools.\\n\\nBefore scanning, you need to know the filesystem\'s device/partition.\\n\\n- Identify the filesystem\'s device by checking the major and minor numbers of that device.\\n\\n1. Obtain the major and minor numbers from the listed volume information.\\n \\n In the following example, the volume name is `pvc-ea7536c0-301f-479e-b2a2-e40ddc864b58`.\\n ```\\n harvester-node-0:~ # ls /dev/longhorn/pvc-ea7536c0-301f-479e-b2a2-e40ddc864b58 -al\\n brw-rw---- 1 root root 8, 0 Oct 23 14:43 /dev/longhorn/pvc-ea7536c0-301f-479e-b2a2-e40ddc864b58\\n ```\\n The output indicates that the major and minor numbers are `8:0`.\\n \\n2. Obtain the device name from the output of the `lsblk` command.\\n ```\\n harvester-node-0:~ # lsblk\\n NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINTS\\n loop0 7:0 0 3G 1 loop /\\n sda 8:0 0 40G 0 disk\\n \u251c\u2500sda1 8:1 0 2M 0 part\\n \u251c\u2500sda2 8:2 0 20M 0 part\\n \u2514\u2500sda3 8:3 0 40G 0 part\\n ```\\n The output indicates that `8:0` are the major and minor numbers of the device named `sda`. Therefore, `/dev/sda` is related to the volume named `pvc-ea7536c0-301f-479e-b2a2-e40ddc864b58`.\\n\\n- You should now know the filesystem\'s partition. In the example below, sda3 is the filesystem\'s partition.\\n- Use the Filesystem toolbox image to scan and repair.\\n\\n```\\n# docker run -it --rm --privileged registry.opensuse.org/isv/rancher/harvester/toolbox/main/fs-toolbox:latest -- bash\\n```\\n\\nThen we try to scan with this target device.\\n\\n### XFS\\n\\nWhen scanning an XFS filesystem, use the `xfs_repair` command and specify the problematic partition of the device.\\n\\nIn the following example, `/dev/sda3` is the problematic partition.\\n```\\n# xfs_repair -n /dev/sda3\\n```\\n\\nTo repair the corrupted partition, run the following command.\\n\\n```\\n# xfs_repair /dev/sda3\\n```\\n\\n### EXT4\\n\\nWhen scanning a EXT4 filesystem, use the `e2fsck` command as follows, where the `/dev/sde1` is the problematic partition of the device.\\n\\n```\\n# e2fsck -f /dev/sde1\\n```\\n\\nTo repair the corrupted partition, run the following command.\\n\\n```\\n# e2fsck -fp /dev/sde1\\n```\\n\\n\\nAfter using the \'e2fsck\' command, you should also see logs related to scanning and repairing the partition. Scanning and repairing the corrupted partition is successful if there are no errors in these logs. \\n\\n\\n## Detach and Start VM again.\\n\\nAfter the corrupted partition is scanned and repaired, detach the volume and try to start the related VM again.\\n\\n- Detach the volume from the Longhorn UI.\\n\\n![detach volume on longhorn UI](./imgs/detach_volume.png)\\n\\n- Start the related VM again from the Harvester UI.\\n\\n![Start VM again](./imgs/start_vm_again.png)\\n\\nYour VM should now work normally."},{"id":"evicting-replicas-from-a-disk-the-cli-way","metadata":{"permalink":"/kb/evicting-replicas-from-a-disk-the-cli-way","editUrl":"https://github.com/harvester/harvesterhci.io/edit/main/kb/kb/2023-01-12/evict_replicas_from_a_disk.md","source":"@site/kb/2023-01-12/evict_replicas_from_a_disk.md","title":"Evicting Replicas From a Disk (the CLI way)","description":"Evicting replicas from a disk (the CLI way)","date":"2023-01-12T00:00:00.000Z","formattedDate":"January 12, 2023","tags":[{"label":"storage","permalink":"/kb/tags/storage"},{"label":"longhorn","permalink":"/kb/tags/longhorn"},{"label":"disk","permalink":"/kb/tags/disk"}],"readingTime":1.935,"truncated":false,"authors":[{"name":"Kiefer Chang","title":"Engineer Manager","url":"https://github.com/bk201","image_url":"https://github.com/bk201.png","imageURL":"https://github.com/bk201.png"}],"frontMatter":{"title":"Evicting Replicas From a Disk (the CLI way)","description":"Evicting replicas from a disk (the CLI way)","slug":"evicting-replicas-from-a-disk-the-cli-way","authors":[{"name":"Kiefer Chang","title":"Engineer Manager","url":"https://github.com/bk201","image_url":"https://github.com/bk201.png","imageURL":"https://github.com/bk201.png"}],"tags":["storage","longhorn","disk"],"hide_table_of_contents":false},"prevItem":{"title":"Scan and Repair Root Filesystem of VirtualMachine","permalink":"/kb/scan-and-repair-vm-root-filesystem"},"nextItem":{"title":"NIC Naming Scheme","permalink":"/kb/nic-naming-scheme"}},"content":"Harvester replicates volumes data across disks in a cluster. Before removing a disk, the user needs to evict replicas on the disk to other disks to preserve the volumes\' configured availability. For more information about eviction in Longhorn, please check [Evicting Replicas on Disabled Disks or Nodes](https://longhorn.io/docs/1.3.2/volumes-and-nodes/disks-or-nodes-eviction/).\\n\\n## Preparation\\n\\nThis document describes how to evict Longhorn disks using the `kubectl` command. Before that, users must ensure the environment is set up correctly.\\nThere are two recommended ways to do this:\\n\\n1. Log in to any management node and switch to root (`sudo -i`).\\n1. Download Kubeconfig file and use it locally\\n - Install `kubectl` and `yq` program manually.\\n - Open Harvester GUI, click `support` at the bottom left of the page and click `Download KubeConfig` to download the Kubeconfig file.\\n - Set the Kubeconfig file\'s path to `KUBECONFIG` environment variable. For example, `export KUBECONFIG=/path/to/kubeconfig`.\\n\\n\\n## Evicting replicas from a disk\\n\\n1. List Longhorn nodes (names are identical to Kubernetes nodes):\\n\\n ```\\n kubectl get -n longhorn-system nodes.longhorn.io\\n ```\\n\\n Sample output:\\n\\n ```\\n NAME READY ALLOWSCHEDULING SCHEDULABLE AGE\\n node1 True true True 24d\\n node2 True true True 24d\\n node3 True true True 24d\\n ```\\n\\n1. List disks on a node. Assume we want to evict replicas of a disk on `node1`:\\n\\n ```\\n kubectl get -n longhorn-system nodes.longhorn.io node1 -o yaml | yq e \'.spec.disks\'\\n ```\\n\\n Sample output:\\n\\n ```\\n default-disk-ed7af10f5b8356be:\\n allowScheduling: true\\n evictionRequested: false\\n path: /var/lib/harvester/defaultdisk\\n storageReserved: 36900254515\\n tags: []\\n ```\\n\\n1. Assume disk `default-disk-ed7af10f5b8356be` is the target we want to evict replicas out of.\\n\\n Edit the node:\\n ```\\n kubectl edit -n longhorn-system nodes.longhorn.io node1 \\n ```\\n\\n Update these two fields and save:\\n - `spec.disks..allowScheduling` to `false`\\n - `spec.disks..evictionRequested` to `true`\\n\\n Sample editing:\\n\\n ```\\n default-disk-ed7af10f5b8356be:\\n allowScheduling: false\\n evictionRequested: true\\n path: /var/lib/harvester/defaultdisk\\n storageReserved: 36900254515\\n tags: []\\n ```\\n\\n1. Wait for all replicas on the disk to be evicted.\\n\\n Get current scheduled replicas on the disk:\\n ```\\n kubectl get -n longhorn-system nodes.longhorn.io node1 -o yaml | yq e \'.status.diskStatus.default-disk-ed7af10f5b8356be.scheduledReplica\'\\n ```\\n\\n Sample output:\\n ```\\n pvc-86d3d212-d674-4c64-b69b-4a2eb1df2272-r-7b422db7: 5368709120\\n pvc-b06f0b09-f30c-4936-8a2a-425b993dd6cb-r-bb0fa6b3: 2147483648\\n pvc-b844bcc6-3b06-4367-a136-3909251cb560-r-08d1ab3c: 53687091200\\n pvc-ea6e0dff-f446-4a38-916a-b3bea522f51c-r-193ca5c6: 10737418240\\n ```\\n\\n Run the command repeatedly, and the output should eventually become an empty map:\\n ```\\n {}\\n ```\\n\\n This means Longhorn evicts replicas on the disk to other disks.\\n\\n :::note\\n \\n If a replica always stays in a disk, please open the [Longhorn GUI](https://docs.harvesterhci.io/v1.1/troubleshooting/harvester#access-embedded-rancher-and-longhorn-dashboards) and check if there is free space on other disks.\\n :::"},{"id":"nic-naming-scheme","metadata":{"permalink":"/kb/nic-naming-scheme","editUrl":"https://github.com/harvester/harvesterhci.io/edit/main/kb/kb/2022-04-06/nic_naming_scheme.md","source":"@site/kb/2022-04-06/nic_naming_scheme.md","title":"NIC Naming Scheme","description":"NIC Naming Scheme changed after upgrading to v1.0.1","date":"2022-04-06T00:00:00.000Z","formattedDate":"April 6, 2022","tags":[{"label":"network","permalink":"/kb/tags/network"}],"readingTime":1.825,"truncated":false,"authors":[{"name":"Date Huang","title":"Software Engineer","url":"https://github.com/tjjh89017","image_url":"https://github.com/tjjh89017.png","imageURL":"https://github.com/tjjh89017.png"}],"frontMatter":{"title":"NIC Naming Scheme","descripion":"NIC Naming Scheme Change","slug":"nic-naming-scheme","authors":[{"name":"Date Huang","title":"Software Engineer","url":"https://github.com/tjjh89017","image_url":"https://github.com/tjjh89017.png","imageURL":"https://github.com/tjjh89017.png"}],"tags":["network"],"hide_table_of_contents":false},"prevItem":{"title":"Evicting Replicas From a Disk (the CLI way)","permalink":"/kb/evicting-replicas-from-a-disk-the-cli-way"},"nextItem":{"title":"Multiple NICs VM Connectivity","permalink":"/kb/multiple-nics-vm-connectivity"}},"content":"## NIC Naming Scheme changed after upgrading to v1.0.1\\n\\n`systemd` in OpenSUSE Leap 15.3 which is the base OS of Harvester is upgraded to `246.16-150300.7.39.1`. In this version, `systemd` will enable additional naming scheme `sle15-sp3` which is `v238` with `bridge_no_slot`. When there is a PCI bridge associated with NIC, `systemd` will never generate `ID_NET_NAME_SLOT` and naming policy in `/usr/lib/systemd/network/99-default.link` will fallback to `ID_NET_NAME_PATH`. According to this change, NIC names might be changed in your Harvester nodes during the upgrade process from `v1.0.0` to `v1.0.1-rc1` or above, and it will cause network issues that are associated with NIC names.\\n\\n## Effect Settings and Workaround\\n\\n### Startup Network Configuration\\n\\nNIC name changes will need to update the name in `/oem/99_custom.yaml`. You could use [migration script](https://github.com/harvester/upgrade-helpers/blob/main/hack/udev_v238_sle15-sp3.py) to change the NIC names which are associated with a PCI bridge.\\n\\n:::tip\\nYou could find an identical machine to test naming changes before applying the configuration to production machines\\n:::\\n\\nYou could simply execute the script with root account in `v1.0.0` via\\n```bash\\n# python3 udev_v238_sle15-sp3.py\\n```\\n\\nIt will output the patched configuration to the screen and you could compare it to the original one to ensure there is no exception. (e.g. We could use `vimdiff` to check the configuration)\\n```bash\\n# python3 udev_v238_sle15-spe3.py > /oem/test\\n# vimdiff /oem/test /oem/99_custom.yaml\\n```\\n\\nAfter checking the result, we could execute the script with `--really-want-to-do` to override the configuration. It will also back up the original configuration file with a timestamp before patching it.\\n```bash\\n# python3 udev_v238_sle15-sp3.py --really-want-to-do\\n```\\n\\n### Harvester VLAN Network Configuration\\n\\nIf your VLAN network is associated with NIC name directly without `bonding`, you will need to migrate `ClusterNetwork` and `NodeNetwork` with the previous section together.\\n\\n:::note\\nIf your VLAN network is associated with the `bonding` name in `/oem/99_custom.yaml`, you could skip this section.\\n:::\\n\\n#### Modify ClusterNetworks\\n\\nYou need to modify `ClusterNetworks` via \\n```bash\\n$ kubectl edit clusternetworks vlan\\n```\\nsearch this pattern\\n```yaml\\nconfig:\\n defaultPhysicalNIC: \\n```\\nand change to new NIC name\\n\\n#### Modify NodeNetworks\\n\\nYou need to modify `NodeNetworks` via\\n```bash\\n$ kubectl edit nodenetworks -vlan\\n```\\nsearch this pattern\\n```yaml\\nspec:\\n nic: \\n```\\nand change to new NIC name"},{"id":"multiple-nics-vm-connectivity","metadata":{"permalink":"/kb/multiple-nics-vm-connectivity","editUrl":"https://github.com/harvester/harvesterhci.io/edit/main/kb/kb/2022-03-10/multiple_nics_vm_connectivity.md","source":"@site/kb/2022-03-10/multiple_nics_vm_connectivity.md","title":"Multiple NICs VM Connectivity","description":"What is the default behavior of a VM with multiple NICs","date":"2022-03-10T00:00:00.000Z","formattedDate":"March 10, 2022","tags":[{"label":"vm","permalink":"/kb/tags/vm"},{"label":"network","permalink":"/kb/tags/network"}],"readingTime":3.955,"truncated":false,"authors":[{"name":"Date Huang","title":"Software Engineer","url":"https://github.com/tjjh89017","image_url":"https://github.com/tjjh89017.png","imageURL":"https://github.com/tjjh89017.png"}],"frontMatter":{"title":"Multiple NICs VM Connectivity","descripion":"How to deal VMs with multiple NICs in Harvester","slug":"multiple-nics-vm-connectivity","authors":[{"name":"Date Huang","title":"Software Engineer","url":"https://github.com/tjjh89017","image_url":"https://github.com/tjjh89017.png","imageURL":"https://github.com/tjjh89017.png"}],"tags":["vm","network"],"hide_table_of_contents":false},"prevItem":{"title":"NIC Naming Scheme","permalink":"/kb/nic-naming-scheme"},"nextItem":{"title":"VM Scheduling","permalink":"/kb/vm-scheduling"}},"content":"## What is the default behavior of a VM with multiple NICs\\n\\nIn [some scenarios](https://github.com/harvester/harvester/issues/1059), you\'ll setup two or more NICs in your VM to serve different networking purposes. If all networks are setup by default with DHCP, you might get random connectivity issues. And while it might get fixed after rebooting the VM, it still will lose connection randomly after some period.\\n\\n## How-to identify connectivity issues\\n\\nIn a Linux VM, you can use commands from the `iproute2` package to identify the default route.\\n\\nIn your VM, execute the following command:\\n```bash\\nip route show default\\n```\\n:::tip\\nIf you get the `access denied` error, please run the command using `sudo`\\n:::\\n \\nThe output of this command will only show the default route with the gateway and VM IP of the primary network interface (`eth0` in the example below).\\n```\\ndefault via dev eth0 proto dhcp src metric 100\\n```\\n\\nHere is the full example:\\n```\\n$ ip route show default\\ndefault via 192.168.0.254 dev eth0 proto dhcp src 192.168.0.100 metric 100\\n```\\n\\nHowever, if the issue covered in this KB occurs, you\'ll only be able to connect to the VM via the VNC or serial console.\\n\\nOnce connected, you can run again the same command as before:\\n```bash\\n$ ip route show default\\n```\\n\\nHowever, this time you\'ll get a default route with an incorrect gateway IP.\\nFor example:\\n```\\ndefault via dev eth0 proto dhcp src metric 100\\n```\\n\\n## Why do connectivity issues occur randomly\\n\\nIn a standard setup, cloud-based VMs typically use DHCP for their NICs configuration. It will set an IP and a gateway for each NIC. Lastly, a default route to the gateway IP will also be added, so you can use its IP to connect to the VM.\\n\\nHowever, Linux distributions start multiple DHCP clients at the same time and do not have a **priority** system. This means that if you have two or more NICs configured with DHCP, the client will enter a **race condition** to configure the default route. And depending on the currently running Linux distribution DHCP script, there is no guarantee which default route will be configured.\\n\\nAs the default route might change in every DHCP renewing process or after every OS reboot, this will create network connectivity issues.\\n\\n## How to avoid the random connectivity issues\\n\\nYou can easily avoid these connectivity issues by having only one NIC attached to the VM and having only one IP and one gateway configured.\\n\\nHowever, for VMs in more complex infrastructures, it is often not possible to use just one NIC. For example, if your infrastructure has a storage network and a service network. For security reasons, the storage network will be isolated from the service network and have a separate subnet. In this case, you must have two NICs to connect to both the service and storage networks.\\n\\nYou can choose a solution below that meets your requirements and security policy.\\n\\n### Disable DHCP on secondary NIC\\n\\nAs mentioned above, the problem is caused by a `race condition` between two DHCP clients. One solution to avoid this problem is to disable DHCP for all NICs and configure them with static IPs only. Likewise, you can configure the secondary NIC with a static IP and keep the primary NIC enabled with DHCP.\\n\\n1. To configure the primary NIC with a static IP (`eth0` in this example), you can edit the file `/etc/sysconfig/network/ifcfg-eth0` with the following values:\\n\\n```\\nBOOTPROTO=\'static\'\\nIPADDR=\'192.168.0.100\'\\nNETMASK=\'255.255.255.0\'\\n```\\n\\nAlternatively, if you want to reserve the primary NIC using DHCP (`eth0` in this example), use the following values instead:\\n\\n```\\nBOOTPROTO=\'dhcp\'\\nDHCLIENT_SET_DEFAULT_ROUTE=\'yes\'\\n```\\n\\n\\n2. You need to configure the default route by editing the file `/etc/sysconfig/network/ifroute-eth0` (if you configured the primary NIC using DHCP, skip this step):\\n\\n\\n```\\n# Destination Dummy/Gateway Netmask Interface\\ndefault 192.168.0.254 - eth0\\n```\\n\\n:::warning\\nDo not put other default route for your secondary NIC\\n:::\\n \\n3. Finally, configure a static IP for the secondary NIC by editing the file `/etc/sysconfig/network/ifcfg-eth1`:\\n\\n```\\nBOOTPROTO=\'static\'\\nIPADDR=\'10.0.0.100\'\\nNETMASK=\'255.255.255.0\'\\n```\\n\\n#### Cloud-Init config\\n\\n```yaml\\nnetwork:\\n version: 1\\n config:\\n - type: physical\\n name: eth0\\n subnets:\\n - type: dhcp\\n - type: physical\\n name: eth1\\n subnets:\\n - type: static\\n address: 10.0.0.100/24\\n```\\n \\n### Disable secondary NIC default route from DHCP\\n\\nIf your secondary NIC requires to get its IP from DHCP, you\'ll need to disable the secondary NIC default route configuration.\\n\\n1. Confirm that the primary NIC configures its default route in the file `/etc/sysconfig/network/ifcfg-eth0`:\\n\\n```\\nBOOTPROTO=\'dhcp\'\\nDHCLIENT_SET_DEFAULT_ROUTE=\'yes\'\\n```\\n\\n2. Disable the secondary NIC default route configuration by editing the file `/etc/sysconfig/network/ifcfg-eth1`:\\n\\n```\\nBOOTPROTO=\'dhcp\'\\nDHCLIENT_SET_DEFAULT_ROUTE=\'no\'\\n```\\n\\n#### Cloud-Init config\\n\\nThis solution is not available in Cloud-Init. Cloud-Init didn\'t allow any option for DHCP."},{"id":"vm-scheduling","metadata":{"permalink":"/kb/vm-scheduling","editUrl":"https://github.com/harvester/harvesterhci.io/edit/main/kb/kb/2022-03-07/vm-scheduling.md","source":"@site/kb/2022-03-07/vm-scheduling.md","title":"VM Scheduling","description":"How does Harvester schedule VMs?","date":"2022-03-07T00:00:00.000Z","formattedDate":"March 7, 2022","tags":[{"label":"vm","permalink":"/kb/tags/vm"},{"label":"scheduling","permalink":"/kb/tags/scheduling"}],"readingTime":15.44,"truncated":false,"authors":[{"name":"PoAn Yang","title":"Software Engineer","url":"https://github.com/FrankYang0529","image_url":"https://github.com/FrankYang0529.png","imageURL":"https://github.com/FrankYang0529.png"}],"frontMatter":{"title":"VM Scheduling","description":"How does Harvester schedule VMs?","slug":"vm-scheduling","authors":[{"name":"PoAn Yang","title":"Software Engineer","url":"https://github.com/FrankYang0529","image_url":"https://github.com/FrankYang0529.png","imageURL":"https://github.com/FrankYang0529.png"}],"tags":["vm","scheduling"],"hide_table_of_contents":false},"prevItem":{"title":"Multiple NICs VM Connectivity","permalink":"/kb/multiple-nics-vm-connectivity"}},"content":"## How does Harvester schedule a VM?\\n\\nHarvester doesn\'t directly schedule a VM in Kubernetes, it relies on [KubeVirt](http://kubevirt.io/) to create the custom resource `VirtualMachine`. When the request to create a new VM is sent, a `VirtualMachineInstance` object is created and it creates the corresponding `Pod`.\\n\\nThe whole VM creation processt leverages `kube-scheduler`, which allows Harvester to use `nodeSelector`, `affinity`, and resources request/limitation to influence where a VM will be deployed.\\n\\n## How does kube-scheduler decide where to deploy a VM?\\n\\nFirst, `kube-scheduler` finds Nodes available to run a pod. After that, `kube-scheduler` scores each available Node by a list of [plugins](https://github.com/kubernetes/kubernetes/tree/v1.22.7/pkg/scheduler/framework/plugins) like [ImageLocality](https://github.com/kubernetes/kubernetes/blob/v1.22.7/pkg/scheduler/framework/plugins/imagelocality/image_locality.go), [InterPodAffinity](https://github.com/kubernetes/kubernetes/tree/v1.22.7/pkg/scheduler/framework/plugins/interpodaffinity), [NodeAffinity](https://github.com/kubernetes/kubernetes/tree/v1.22.7/pkg/scheduler/framework/plugins/nodeaffinity), etc. \\n\\nFinally, `kube-scheduler` calculates the scores from the plugins results for each Node, and select the Node with the highest score to deploy the Pod.\\n\\nFor example, let\'s say we have a three nodes Harvester cluster with 6 cores CPU and 16G RAM each, and we want to deploy a VM with 1 CPU and 1G RAM (without resources overcommit). \\n\\n`kube-scheduler` will summarize the scores, as displayed in _Table 1_ below, and will select the node with the highest score, `harvester-node-2` in this case, to deploy the VM.\\n\\n
\\n kube-scheduler logs\\n\\n```\\nvirt-launcher-vm-without-overcommit-75q9b -> harvester-node-0: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9960 memory:15166603264] ,score 0,\\nvirt-launcher-vm-without-overcommit-75q9b -> harvester-node-1: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5560 memory:6352273408] ,score 45,\\nvirt-launcher-vm-without-overcommit-75q9b -> harvester-node-2: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5350 memory:5941231616] ,score 46,\\n\\nvirt-launcher-vm-without-overcommit-75q9b -> harvester-node-0: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9960 memory:15166603264] ,score 4,\\nvirt-launcher-vm-without-overcommit-75q9b -> harvester-node-1: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5560 memory:6352273408] ,score 34,\\nvirt-launcher-vm-without-overcommit-75q9b -> harvester-node-2: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5350 memory:5941231616] ,score 37,\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"ImageLocality\\" node=\\"harvester-node-0\\" score=54\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"ImageLocality\\" node=\\"harvester-node-1\\" score=54\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"ImageLocality\\" node=\\"harvester-node-2\\" score=54\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"InterPodAffinity\\" node=\\"harvester-node-0\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"InterPodAffinity\\" node=\\"harvester-node-1\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"InterPodAffinity\\" node=\\"harvester-node-2\\" score=0\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"NodeResourcesLeastAllocated\\" node=\\"harvester-node-0\\" score=4\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"NodeResourcesLeastAllocated\\" node=\\"harvester-node-1\\" score=34\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"NodeResourcesLeastAllocated\\" node=\\"harvester-node-2\\" score=37\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"NodeAffinity\\" node=\\"harvester-node-0\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"NodeAffinity\\" node=\\"harvester-node-1\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"NodeAffinity\\" node=\\"harvester-node-2\\" score=0\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"NodePreferAvoidPods\\" node=\\"harvester-node-0\\" score=1000000\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"NodePreferAvoidPods\\" node=\\"harvester-node-2\\" score=1000000\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"NodePreferAvoidPods\\" node=\\"harvester-node-1\\" score=1000000\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"PodTopologySpread\\" node=\\"harvester-node-0\\" score=200\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"PodTopologySpread\\" node=\\"harvester-node-1\\" score=200\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"PodTopologySpread\\" node=\\"harvester-node-2\\" score=200\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"TaintToleration\\" node=\\"harvester-node-0\\" score=100\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"TaintToleration\\" node=\\"harvester-node-1\\" score=100\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"TaintToleration\\" node=\\"harvester-node-2\\" score=100\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"NodeResourcesBalancedAllocation\\" node=\\"harvester-node-0\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"NodeResourcesBalancedAllocation\\" node=\\"harvester-node-1\\" score=45\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"NodeResourcesBalancedAllocation\\" node=\\"harvester-node-2\\" score=46\\n\\n\\"Calculated node\'s final score for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" node=\\"harvester-node-0\\" score=1000358\\n\\"Calculated node\'s final score for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" node=\\"harvester-node-1\\" score=1000433\\n\\"Calculated node\'s final score for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" node=\\"harvester-node-2\\" score=1000437\\n\\nAssumePodVolumes for pod \\"default/virt-launcher-vm-without-overcommit-75q9b\\", node \\"harvester-node-2\\"\\nAssumePodVolumes for pod \\"default/virt-launcher-vm-without-overcommit-75q9b\\", node \\"harvester-node-2\\": all PVCs bound and nothing to do\\n\\"Attempting to bind pod to node\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" node=\\"harvester-node-2\\"\\n```\\n
\\n\\n**Table 1 - kube-scheduler scores example**\\n\\n| | harvester-node-0 | harvester-node-1 | harvester-node-2 |\\n|:-------------------------------:|:----------------:|:----------------:|:----------------:|\\n| ImageLocality | 54 | 54 | 54 |\\n| InterPodAffinity | 0 | 0 | 0 |\\n| NodeResourcesLeastAllocated | 4 | 34 | 37 |\\n| NodeAffinity | 0 | 0 | 0 |\\n| NodePreferAvoidPods | 1000000 | 1000000 | 1000000 |\\n| PodTopologySpread | 200 | 200 | 200 |\\n| TaintToleration | 100 | 100 | 100 |\\n| NodeResourcesBalancedAllocation | 0 | 45 | 46 |\\n| Total | 1000358 | 1000433 | 1000437 |\\n\\n## Why VMs are distributed unevenly with overcommit?\\n\\nWith resources overcommit, Harvester modifies the resources request. By default, the `overcommit` configuration is `{\\"cpu\\": 1600, \\"memory\\": 150, \\"storage\\": 200}`. This means that if we request a VM with 1 CPU and 1G RAM, its `resources.requests.cpu` will become `62m`. \\n\\n!!! note\\n The unit suffix `m` stands for \\"thousandth of a core.\\"\\n\\nTo explain it, let\'s take the case of CPU overcommit. The default value of 1 CPU is equal to 1000m CPU, and with the default overcommit configuration of `\\"cpu\\": 1600`, the CPU resource will be 16x smaller. Here is the calculation: `1000m * 100 / 1600 = 62m`.\\n\\nNow, we can see how overcommitting influences `kube-scheduler` scores.\\n\\nIn this example, we use a three nodes Harvester cluster with 6 cores and 16G RAM each. We will deploy two VMs with 1 CPU and 1G RAM, and we will compare the scores for both cases of \\"with-overcommit\\" and \\"without-overcommit\\" resources. \\n\\nThe results of both tables _Table 2_ and _Table 3_ can be explained as follow:\\n\\nIn the \\"with-overcommit\\" case, both VMs are deployed on `harvester-node-2`, however in the \\"without-overcommit\\" case, the VM1 is deployed on `harvester-node-2`, and VM2 is deployed on `harvester-node-1`. \\n\\nIf we look at the detailed scores, we\'ll see a variation of `Total Score` for `harvester-node-2` from `1000459` to `1000461` in the \\"with-overcommit\\" case, and `1000437` to `1000382` in the \\"without-overcommit case\\". It\'s because resources overcommit influences `request-cpu` and `request-memory`. \\n\\nIn the \\"with-overcommit\\" case, the `request-cpu` changes from `4412m` to `4474m`. The difference between the two numbers is `62m`, which is what we calculated above. However, in the \\"without-overcommit\\" case, we send **real** requests to `kube-scheduler`, so the `request-cpu` changes from `5350m` to `6350m`.\\n\\nFinally, since most plugins give the same scores for each node except `NodeResourcesBalancedAllocation` and `NodeResourcesLeastAllocated`, we\'ll see a difference of these two scores for each node.\\n\\nFrom the results, we can see the overcommit feature influences the final score of each Node, so VMs are distributed unevenly. Although the `harvester-node-2` score for VM 2 is higher than VM 1, it\'s not always increasing. In _Table 4_, we keep deploying VM with 1 CPU and 1G RAM, and we can see the score of `harvester-node-2` starts decreasing from 11th VM. The behavior of `kube-scheduler` depends on your cluster resources and the workload you deployed.\\n\\n
\\n kube-scheduler logs for vm1-with-overcommit\\n\\n```\\nvirt-launcher-vm1-with-overcommit-ljlmq -> harvester-node-0: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9022 memory:14807289856] ,score 0,\\nvirt-launcher-vm1-with-overcommit-ljlmq -> harvester-node-1: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4622 memory:5992960000] ,score 58,\\nvirt-launcher-vm1-with-overcommit-ljlmq -> harvester-node-2: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4412 memory:5581918208] ,score 59,\\n\\nvirt-launcher-vm1-with-overcommit-ljlmq -> harvester-node-0: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9022 memory:14807289856] ,score 5,\\nvirt-launcher-vm1-with-overcommit-ljlmq -> harvester-node-1: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4622 memory:5992960000] ,score 43,\\nvirt-launcher-vm1-with-overcommit-ljlmq -> harvester-node-2: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4412 memory:5581918208] ,score 46,\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"InterPodAffinity\\" node=\\"harvester-node-0\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"InterPodAffinity\\" node=\\"harvester-node-1\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"InterPodAffinity\\" node=\\"harvester-node-2\\" score=0\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"NodeResourcesLeastAllocated\\" node=\\"harvester-node-0\\" score=5\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"NodeResourcesLeastAllocated\\" node=\\"harvester-node-1\\" score=43\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"NodeResourcesLeastAllocated\\" node=\\"harvester-node-2\\" score=46\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"NodeAffinity\\" node=\\"harvester-node-0\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"NodeAffinity\\" node=\\"harvester-node-1\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"NodeAffinity\\" node=\\"harvester-node-2\\" score=0\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"NodePreferAvoidPods\\" node=\\"harvester-node-0\\" score=1000000\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"NodePreferAvoidPods\\" node=\\"harvester-node-1\\" score=1000000\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"NodePreferAvoidPods\\" node=\\"harvester-node-2\\" score=1000000\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"PodTopologySpread\\" node=\\"harvester-node-0\\" score=200\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"PodTopologySpread\\" node=\\"harvester-node-1\\" score=200\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"PodTopologySpread\\" node=\\"harvester-node-2\\" score=200\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"TaintToleration\\" node=\\"harvester-node-0\\" score=100\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"TaintToleration\\" node=\\"harvester-node-1\\" score=100\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"TaintToleration\\" node=\\"harvester-node-2\\" score=100\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"NodeResourcesBalancedAllocation\\" node=\\"harvester-node-0\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"NodeResourcesBalancedAllocation\\" node=\\"harvester-node-1\\" score=58\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"NodeResourcesBalancedAllocation\\" node=\\"harvester-node-2\\" score=59\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"ImageLocality\\" node=\\"harvester-node-0\\" score=54\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"ImageLocality\\" node=\\"harvester-node-1\\" score=54\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"ImageLocality\\" node=\\"harvester-node-2\\" score=54\\n\\n\\"Calculated node\'s final score for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" node=\\"harvester-node-0\\" score=1000359\\n\\"Calculated node\'s final score for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" node=\\"harvester-node-1\\" score=1000455\\n\\"Calculated node\'s final score for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" node=\\"harvester-node-2\\" score=1000459\\n\\nAssumePodVolumes for pod \\"default/virt-launcher-vm1-with-overcommit-ljlmq\\", node \\"harvester-node-2\\"\\nAssumePodVolumes for pod \\"default/virt-launcher-vm1-with-overcommit-ljlmq\\", node \\"harvester-node-2\\": all PVCs bound and nothing to do\\n\\"Attempting to bind pod to node\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" node=\\"harvester-node-2\\"\\n```\\n
\\n\\n
\\n kube-scheduler logs for vm2-with-overcommit\\n\\n```\\nvirt-launcher-vm2-with-overcommit-pwrx4 -> harvester-node-0: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9022 memory:14807289856] ,score 0,\\nvirt-launcher-vm2-with-overcommit-pwrx4 -> harvester-node-1: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4622 memory:5992960000] ,score 58,\\nvirt-launcher-vm2-with-overcommit-pwrx4 -> harvester-node-2: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4474 memory:6476701696] ,score 64,\\n\\nvirt-launcher-vm2-with-overcommit-pwrx4 -> harvester-node-0: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9022 memory:14807289856] ,score 5,\\nvirt-launcher-vm2-with-overcommit-pwrx4 -> harvester-node-1: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4622 memory:5992960000] ,score 43,\\nvirt-launcher-vm2-with-overcommit-pwrx4 -> harvester-node-2: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4474 memory:6476701696] ,score 43,\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"NodeAffinity\\" node=\\"harvester-node-0\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"NodeAffinity\\" node=\\"harvester-node-1\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"NodeAffinity\\" node=\\"harvester-node-2\\" score=0\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"NodePreferAvoidPods\\" node=\\"harvester-node-0\\" score=1000000\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"NodePreferAvoidPods\\" node=\\"harvester-node-1\\" score=1000000\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"NodePreferAvoidPods\\" node=\\"harvester-node-2\\" score=1000000\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"PodTopologySpread\\" node=\\"harvester-node-0\\" score=200\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"PodTopologySpread\\" node=\\"harvester-node-1\\" score=200\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"PodTopologySpread\\" node=\\"harvester-node-2\\" score=200\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"TaintToleration\\" node=\\"harvester-node-0\\" score=100\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"TaintToleration\\" node=\\"harvester-node-1\\" score=100\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"TaintToleration\\" node=\\"harvester-node-2\\" score=100\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"NodeResourcesBalancedAllocation\\" node=\\"harvester-node-0\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"NodeResourcesBalancedAllocation\\" node=\\"harvester-node-1\\" score=58\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"NodeResourcesBalancedAllocation\\" node=\\"harvester-node-2\\" score=64\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"ImageLocality\\" node=\\"harvester-node-0\\" score=54\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"ImageLocality\\" node=\\"harvester-node-1\\" score=54\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"ImageLocality\\" node=\\"harvester-node-2\\" score=54\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"InterPodAffinity\\" node=\\"harvester-node-0\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"InterPodAffinity\\" node=\\"harvester-node-1\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"InterPodAffinity\\" node=\\"harvester-node-2\\" score=0\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"NodeResourcesLeastAllocated\\" node=\\"harvester-node-0\\" score=5\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"NodeResourcesLeastAllocated\\" node=\\"harvester-node-1\\" score=43\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"NodeResourcesLeastAllocated\\" node=\\"harvester-node-2\\" score=43\\n\\n\\"Calculated node\'s final score for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" node=\\"harvester-node-0\\" score=1000359\\n\\"Calculated node\'s final score for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" node=\\"harvester-node-1\\" score=1000455\\n\\"Calculated node\'s final score for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" node=\\"harvester-node-2\\" score=1000461\\n\\nAssumePodVolumes for pod \\"default/virt-launcher-vm2-with-overcommit-pwrx4\\", node \\"harvester-node-2\\"\\nAssumePodVolumes for pod \\"default/virt-launcher-vm2-with-overcommit-pwrx4\\", node \\"harvester-node-2\\": all PVCs bound and nothing to do\\n\\"Attempting to bind pod to node\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" node=\\"harvester-node-2\\"\\n```\\n
\\n\\n
\\n kube-scheduler logs for vm1-without-overcommit\\n\\n```\\nvirt-launcher-vm1-with-overcommit-6xqmq -> harvester-node-0: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9960 memory:15166603264] ,score 0,\\nvirt-launcher-vm1-with-overcommit-6xqmq -> harvester-node-1: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5560 memory:6352273408] ,score 45,\\nvirt-launcher-vm1-with-overcommit-6xqmq -> harvester-node-2: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5350 memory:5941231616] ,score 46,\\n\\nvirt-launcher-vm1-with-overcommit-6xqmq -> harvester-node-0: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9960 memory:15166603264] ,score 4,\\nvirt-launcher-vm1-with-overcommit-6xqmq -> harvester-node-1: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5560 memory:6352273408] ,score 34,\\nvirt-launcher-vm1-with-overcommit-6xqmq -> harvester-node-2: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5350 memory:5941231616] ,score 37,\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"InterPodAffinity\\" node=\\"harvester-node-0\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"InterPodAffinity\\" node=\\"harvester-node-1\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"InterPodAffinity\\" node=\\"harvester-node-2\\" score=0\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"NodeResourcesLeastAllocated\\" node=\\"harvester-node-0\\" score=4\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"NodeResourcesLeastAllocated\\" node=\\"harvester-node-1\\" score=34\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"NodeResourcesLeastAllocated\\" node=\\"harvester-node-2\\" score=37\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"NodeAffinity\\" node=\\"harvester-node-0\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"NodeAffinity\\" node=\\"harvester-node-1\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"NodeAffinity\\" node=\\"harvester-node-2\\" score=0\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"NodePreferAvoidPods\\" node=\\"harvester-node-0\\" score=1000000\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"NodePreferAvoidPods\\" node=\\"harvester-node-1\\" score=1000000\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"NodePreferAvoidPods\\" node=\\"harvester-node-2\\" score=1000000\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"PodTopologySpread\\" node=\\"harvester-node-0\\" score=200\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"PodTopologySpread\\" node=\\"harvester-node-1\\" score=200\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"PodTopologySpread\\" node=\\"harvester-node-2\\" score=200\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"TaintToleration\\" node=\\"harvester-node-0\\" score=100\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"TaintToleration\\" node=\\"harvester-node-1\\" score=100\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"TaintToleration\\" node=\\"harvester-node-2\\" score=100\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"NodeResourcesBalancedAllocation\\" node=\\"harvester-node-0\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"NodeResourcesBalancedAllocation\\" node=\\"harvester-node-1\\" score=45\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"NodeResourcesBalancedAllocation\\" node=\\"harvester-node-2\\" score=46\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"ImageLocality\\" node=\\"harvester-node-0\\" score=54\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"ImageLocality\\" node=\\"harvester-node-1\\" score=54\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"ImageLocality\\" node=\\"harvester-node-2\\" score=54\\n\\n\\"Calculated node\'s final score for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" node=\\"harvester-node-0\\" score=1000358\\n\\"Calculated node\'s final score for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" node=\\"harvester-node-1\\" score=1000433\\n\\"Calculated node\'s final score for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" node=\\"harvester-node-2\\" score=1000437\\n\\nAssumePodVolumes for pod \\"default/virt-launcher-vm1-with-overcommit-6xqmq\\", node \\"harvester-node-2\\"\\nAssumePodVolumes for pod \\"default/virt-launcher-vm1-with-overcommit-6xqmq\\", node \\"harvester-node-2\\": all PVCs bound and nothing to do\\n\\"Attempting to bind pod to node\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" node=\\"harvester-node-2\\"\\n```\\n
\\n\\n
\\n kube-scheduler logs for vm2-without-overcommit\\n\\n```\\nvirt-launcher-vm2-without-overcommit-mf5vk -> harvester-node-0: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9960 memory:15166603264] ,score 0,\\nvirt-launcher-vm2-without-overcommit-mf5vk -> harvester-node-1: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5560 memory:6352273408] ,score 45,\\nvirt-launcher-vm2-without-overcommit-mf5vk -> harvester-node-2: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:6350 memory:7195328512] ,score 0,\\n\\nvirt-launcher-vm2-without-overcommit-mf5vk -> harvester-node-0: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9960 memory:15166603264] ,score 4,\\nvirt-launcher-vm2-without-overcommit-mf5vk -> harvester-node-1: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5560 memory:6352273408] ,score 34,\\nvirt-launcher-vm2-without-overcommit-mf5vk -> harvester-node-2: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:6350 memory:7195328512] ,score 28,\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"PodTopologySpread\\" node=\\"harvester-node-0\\" score=200\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"PodTopologySpread\\" node=\\"harvester-node-1\\" score=200\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"PodTopologySpread\\" node=\\"harvester-node-2\\" score=200\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"TaintToleration\\" node=\\"harvester-node-0\\" score=100\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"TaintToleration\\" node=\\"harvester-node-1\\" score=100\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"TaintToleration\\" node=\\"harvester-node-2\\" score=100\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"NodeResourcesBalancedAllocation\\" node=\\"harvester-node-0\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"NodeResourcesBalancedAllocation\\" node=\\"harvester-node-1\\" score=45\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"NodeResourcesBalancedAllocation\\" node=\\"harvester-node-2\\" score=0\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"ImageLocality\\" node=\\"harvester-node-0\\" score=54\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"ImageLocality\\" node=\\"harvester-node-1\\" score=54\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"ImageLocality\\" node=\\"harvester-node-2\\" score=54\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"InterPodAffinity\\" node=\\"harvester-node-0\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"InterPodAffinity\\" node=\\"harvester-node-1\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"InterPodAffinity\\" node=\\"harvester-node-2\\" score=0\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"NodeResourcesLeastAllocated\\" node=\\"harvester-node-0\\" score=4\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"NodeResourcesLeastAllocated\\" node=\\"harvester-node-1\\" score=34\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"NodeResourcesLeastAllocated\\" node=\\"harvester-node-2\\" score=28\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"NodeAffinity\\" node=\\"harvester-node-0\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"NodeAffinity\\" node=\\"harvester-node-1\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"NodeAffinity\\" node=\\"harvester-node-2\\" score=0\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"NodePreferAvoidPods\\" node=\\"harvester-node-0\\" score=1000000\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"NodePreferAvoidPods\\" node=\\"harvester-node-1\\" score=1000000\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"NodePreferAvoidPods\\" node=\\"harvester-node-2\\" score=1000000\\n\\n\\"Calculated node\'s final score for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" node=\\"harvester-node-0\\" score=1000358\\n\\"Calculated node\'s final score for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" node=\\"harvester-node-1\\" score=1000433\\n\\"Calculated node\'s final score for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" node=\\"harvester-node-2\\" score=1000382\\n\\nAssumePodVolumes for pod \\"default/virt-launcher-vm2-without-overcommit-mf5vk\\", node \\"harvester-node-1\\"\\nAssumePodVolumes for pod \\"default/virt-launcher-vm2-without-overcommit-mf5vk\\", node \\"harvester-node-1\\": all PVCs bound and nothing to do\\n\\"Attempting to bind pod to node\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" node=\\"harvester-node-1\\"\\n```\\n
\\n\\n**Table 2 - With Overcommit**\\n\\n| VM 1 / VM 2 | harvester-node-0 | harvester-node-1 | harvester-node-2 |\\n|:-------------------------------------:|--------------------------:|------------------------:|------------------------:|\\n| request-cpu (m) | 9022 / 9022 | 4622 / 4622 | **4412** / **4474** |\\n| request-memory | 14807289856 / 14807289856 | 5992960000 / 5992960000 | **5581918208** / **6476701696** |\\n| NodeResourcesBalancedAllocation Score | 0 / 0 | 58 / 58 | **59** / **64** |\\n| NodeResourcesLeastAllocated Score | 5 / 5 | 43 / 43 | **46** / **43** |\\n| Other Scores | 1000354 / 1000354 | 1000354 / 1000354 | 1000354 / 1000354 |\\n| Total Score | 1000359 / 1000359 | 1000455 / 1000455 | **1000459** / **1000461** |\\n\\n**Table 3 - Without Overcommit**\\n\\n| VM 1 / VM 2 | harvester-node-0 | harvester-node-1 | harvester-node-2 |\\n|:-------------------------------------:|--------------------------:|------------------------:|------------------------:|\\n| request-cpu (m) | 9960 / 9960 | 5560 / **5560** | **5350** / 6350 |\\n| request-memory | 15166603264 / 15166603264 | 6352273408 / **6352273408** | **5941231616** / 7195328512 |\\n| NodeResourcesBalancedAllocation Score | 0 / 0 | 45 / **45** | **46** / 0 |\\n| NodeResourcesLeastAllocated Score | 4 / 4 | 34 / **34** | **37** / 28 |\\n| Other Scores | 1000354 / 1000354 | 1000354 / **1000354** | **1000354** / 1000354 |\\n| Total Score | 1000358 / 1000358 | 1000358 / **1000433** | **1000437** / 1000382 |\\n\\n**Table 4**\\n\\n| Score | harvester-node-0 | harvester-node-1 | harvester-node-2 |\\n|:-----:|-----------------:|-----------------:|-----------------:|\\n| VM 1 | 1000359 | 1000455 | 1000459 |\\n| VM 2 | 1000359 | 1000455 | 1000461 |\\n| VM 3 | 1000359 | 1000455 | 1000462 |\\n| VM 4 | 1000359 | 1000455 | 1000462 |\\n| VM 5 | 1000359 | 1000455 | 1000463 |\\n| VM 6 | 1000359 | 1000455 | 1000465 |\\n| VM 7 | 1000359 | 1000455 | 1000466 |\\n| VM 8 | 1000359 | 1000455 | 1000467 |\\n| VM 9 | 1000359 | 1000455 | 1000469 |\\n| VM 10 | 1000359 | 1000455 | 1000469 |\\n| VM 11 | 1000359 | 1000455 | **1000465** |\\n| VM 12 | 1000359 | 1000455 | **1000457** |\\n\\n\\n## How to avoid uneven distribution of VMs?\\n\\nThere are many plugins in `kube-scheduler` which we can use to influence the scores. For example, we can add the `podAntiAffinity` plugin to avoid VMs with the same labels being deployed on the same node.\\n\\n```\\n affinity:\\n podAntiAffinity:\\n preferredDuringSchedulingIgnoredDuringExecution:\\n - podAffinityTerm:\\n labelSelector:\\n matchExpressions:\\n - key: harvesterhci.io/creator\\n operator: Exists\\n topologyKey: kubernetes.io/hostname\\n weight: 100\\n```\\n\\n## How to see scores in kube-scheduler?\\n\\n`kube-scheduler` is deployed as a static pod in Harvester. The file is under `/var/lib/rancher/rke2/agent/pod-manifests/kube-scheduler.yaml` in each Management Node. We can add `- --v=10` to the `kube-scheduler` container to show score logs.\\n\\n```\\nkind: Pod\\nmetadata:\\n labels:\\n component: kube-scheduler\\n tier: control-plane\\n name: kube-scheduler\\n namespace: kube-system\\nspec:\\n containers:\\n - command:\\n - kube-scheduler\\n # ...\\n - --v=10\\n```"}]}')}}]); \ No newline at end of file +"use strict";(self.webpackChunkharvesterhci_io=self.webpackChunkharvesterhci_io||[]).push([[4950],{3287:function(e){e.exports=JSON.parse('{"blogPosts":[{"id":"calculation_of_resource_metrics_in_harvester","metadata":{"permalink":"/kb/calculation_of_resource_metrics_in_harvester","editUrl":"https://github.com/harvester/harvesterhci.io/edit/main/kb/kb/2024-01-23/harvester_resource_metrics_calculation.md","source":"@site/kb/2024-01-23/harvester_resource_metrics_calculation.md","title":"Calculation of Resource Metrics in Harvester","description":"Understand how resource metrics are calculated.","date":"2024-01-23T00:00:00.000Z","formattedDate":"January 23, 2024","tags":[{"label":"harvester","permalink":"/kb/tags/harvester"},{"label":"resource metrics","permalink":"/kb/tags/resource-metrics"},{"label":"reserved resource","permalink":"/kb/tags/reserved-resource"},{"label":"calculation","permalink":"/kb/tags/calculation"}],"readingTime":2.835,"truncated":false,"authors":[{"name":"Jian Wang","title":"Staff Software Engineer","url":"https://github.com/w13915984028","image_url":"https://github.com/w13915984028.png","imageURL":"https://github.com/w13915984028.png"}],"frontMatter":{"title":"Calculation of Resource Metrics in Harvester","description":"Understand how resource metrics are calculated.","slug":"calculation_of_resource_metrics_in_harvester","authors":[{"name":"Jian Wang","title":"Staff Software Engineer","url":"https://github.com/w13915984028","image_url":"https://github.com/w13915984028.png","imageURL":"https://github.com/w13915984028.png"}],"tags":["harvester","resource metrics","reserved resource","calculation"],"hide_table_of_contents":false},"nextItem":{"title":"Best Practices for Optimizing Longhorn Disk Performance","permalink":"/kb/best_practices_for_optimizing_longhorn_disk_performance"}},"content":"Harvester calculates the resource metrics using data that is dynamically collected from the system. Host-level resource metrics are calculated and then aggregated to obtain the cluster-level metrics.\\n\\nYou can view resource-related metrics on the Harvester UI.\\n\\n- **Hosts** screen: Displays host-level metrics\\n\\n ![host level resources metrics](./imgs/host-resource-usage.png)\\n\\n- **Dashboard** screen: Displays cluster-level metrics\\n\\n ![cluster level resources metrics](./imgs/cluster-resource-usage.png)\\n\\n## CPU and Memory\\n\\nThe following sections describe the data sources and calculation methods for CPU and memory resources.\\n\\n- Resource capacity: Baseline data\\n- Resource usage: Data source for the **Used** field on the **Hosts** screen\\n- Resource reservation: Data source for the **Reserved** field on the **Hosts** screen\\n\\n### Resource Capacity \\n\\nIn Kubernetes, a `Node` object is created for each host.\\n\\nThe `.status.allocatable.cpu` and `.status.allocatable.memory` represent the available CPU and Memory resources of a host.\\n\\n```\\n# kubectl get nodes -A -oyaml\\napiVersion: v1\\nitems:\\n- apiVersion: v1\\n kind: Node\\n metadata:\\n..\\n management.cattle.io/pod-limits: \'{\\"cpu\\":\\"12715m\\",\\"devices.kubevirt.io/kvm\\":\\"1\\",\\"devices.kubevirt.io/tun\\":\\"1\\",\\"devices.kubevirt.io/vhost-net\\":\\"1\\",\\"memory\\":\\"17104951040\\"}\'\\n management.cattle.io/pod-requests: \'{\\"cpu\\":\\"5657m\\",\\"devices.kubevirt.io/kvm\\":\\"1\\",\\"devices.kubevirt.io/tun\\":\\"1\\",\\"devices.kubevirt.io/vhost-net\\":\\"1\\",\\"ephemeral-storage\\":\\"50M\\",\\"memory\\":\\"9155862208\\",\\"pods\\":\\"78\\"}\'\\n node.alpha.kubernetes.io/ttl: \\"0\\"\\n..\\n name: harv41\\n resourceVersion: \\"2170215\\"\\n uid: b6f5850a-2fbc-4aef-8fbe-121dfb671b67\\n spec:\\n podCIDR: 10.52.0.0/24\\n podCIDRs:\\n - 10.52.0.0/24\\n providerID: rke2://harv41\\n status:\\n addresses:\\n - address: 192.168.122.141\\n type: InternalIP\\n - address: harv41\\n type: Hostname\\n allocatable:\\n cpu: \\"10\\"\\n devices.kubevirt.io/kvm: 1k\\n devices.kubevirt.io/tun: 1k\\n devices.kubevirt.io/vhost-net: 1k\\n ephemeral-storage: \\"149527126718\\"\\n hugepages-1Gi: \\"0\\"\\n hugepages-2Mi: \\"0\\"\\n memory: 20464216Ki\\n pods: \\"200\\"\\n capacity:\\n cpu: \\"10\\"\\n devices.kubevirt.io/kvm: 1k\\n devices.kubevirt.io/tun: 1k\\n devices.kubevirt.io/vhost-net: 1k\\n ephemeral-storage: 153707984Ki\\n hugepages-1Gi: \\"0\\"\\n hugepages-2Mi: \\"0\\"\\n memory: 20464216Ki\\n pods: \\"200\\"\\n```\\n\\n### Resource Usage\\n\\nCPU and memory usage data is continuously collected and stored in the `NodeMetrics` object. Harvester reads the data from `usage.cpu` and `usage.memory`.\\n\\n```\\n# kubectl get NodeMetrics -A -oyaml\\napiVersion: v1\\nitems:\\n- apiVersion: metrics.k8s.io/v1beta1\\n kind: NodeMetrics\\n metadata:\\n...\\n name: harv41\\n timestamp: \\"2024-01-23T12:04:44Z\\"\\n usage:\\n cpu: 891736742n\\n memory: 9845008Ki\\n window: 10.149s\\n```\\n\\n### Resource Reservation\\n\\nHarvester dynamically calculates the resource limits and requests of all pods running on a host, and updates the information to the annotations of the `NodeMetrics` object.\\n\\n```\\n management.cattle.io/pod-limits: \'{\\"cpu\\":\\"12715m\\",...,\\"memory\\":\\"17104951040\\"}\'\\n management.cattle.io/pod-requests: \'{\\"cpu\\":\\"5657m\\",...,\\"memory\\":\\"9155862208\\"}\'\\n```\\n\\nFor more information, see [Requests and Limits](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#requests-and-limits) in the Kubernetes documentation.\\n\\n## Storage\\n\\nLonghorn is the default Container Storage Interface (CSI) driver of Harvester, providing storage management features such as distributed block storage and tiering.\\n\\n### Reserved Storage in Longhorn\\n\\nLonghorn allows you to specify the percentage of disk space that is not allocated to the default disk on each new Longhorn node. The default value is \\"30\\". For more information, see [Storage Reserved Percentage For Default Disk](https://longhorn.io/docs/1.5.3/references/settings/#storage-reserved-percentage-for-default-disk) in the Longhorn documentation.\\n\\nDepending on the disk size, you can modify the default value using the [embedded Longhorn UI](https://docs.harvesterhci.io/v1.2/troubleshooting/harvester/#access-embedded-rancher-and-longhorn-dashboards).\\n\\n:::note\\n\\nBefore changing the settings, read the Longhorn documentation carefully.\\n\\n:::\\n\\n### Data Sources and Calculation\\n\\nHarvester uses the following data to calculate metrics for storage resources.\\n\\n- Sum of the `storageMaximum` values of all disks (`status.diskStatus.disk-name`): Total storage capacity\\n\\n- Total storage capacity - Sum of the `storageAvailable` values of all disks (`status.diskStatus.disk-name`): Data source for the **Used** field on the **Hosts** screen\\n\\n- Sum of the `storageReserved` values of all disks (`spec.disks`): Data source for the **Reserved** field on the **Hosts** screen\\n\\n```\\n# kubectl get nodes.longhorn.io -n longhorn-system -oyaml\\n\\napiVersion: v1\\nitems:\\n- apiVersion: longhorn.io/v1beta2\\n kind: Node\\n metadata:\\n..\\n name: harv41\\n namespace: longhorn-system\\n..\\n spec:\\n allowScheduling: true\\n disks:\\n default-disk-ef11a18c36b01132:\\n allowScheduling: true\\n diskType: filesystem\\n evictionRequested: false\\n path: /var/lib/harvester/defaultdisk\\n storageReserved: 24220101427\\n tags: []\\n..\\n status:\\n..\\n diskStatus:\\n default-disk-ef11a18c36b01132:\\n..\\n diskType: filesystem\\n diskUUID: d2788933-8817-44c6-b688-dee414cc1f73\\n scheduledReplica:\\n pvc-95561210-c39c-4c2e-ac9a-4a9bd72b3100-r-20affeca: 2147483648\\n pvc-9e83b2dc-6a4b-4499-ba70-70dc25b2d9aa-r-4ad05c86: 32212254720\\n pvc-bc25be1e-ca4e-4818-a16d-48353a0f2f96-r-c7b88c60: 3221225472\\n pvc-d9d3e54d-8d67-4740-861e-6373f670f1e4-r-f4c7c338: 2147483648\\n pvc-e954b5fe-bbd7-4d44-9866-6ff6684d5708-r-ba6b87b6: 5368709120\\n storageAvailable: 77699481600\\n storageMaximum: 80733671424\\n storageScheduled: 45097156608\\n region: \\"\\"\\n snapshotCheckStatus: {}\\n zone: \\"\\"\\n```"},{"id":"best_practices_for_optimizing_longhorn_disk_performance","metadata":{"permalink":"/kb/best_practices_for_optimizing_longhorn_disk_performance","editUrl":"https://github.com/harvester/harvesterhci.io/edit/main/kb/kb/2023-12-27/best_practices_disk_performance.md","source":"@site/kb/2023-12-27/best_practices_disk_performance.md","title":"Best Practices for Optimizing Longhorn Disk Performance","description":"Follow the recommendations for achieving optimal disk performance.","date":"2023-12-27T00:00:00.000Z","formattedDate":"December 27, 2023","tags":[{"label":"harvester","permalink":"/kb/tags/harvester"},{"label":"longhorn","permalink":"/kb/tags/longhorn"},{"label":"best practices","permalink":"/kb/tags/best-practices"},{"label":"disk performance","permalink":"/kb/tags/disk-performance"}],"readingTime":1.585,"truncated":false,"authors":[{"name":"David Ko","title":"Senior Software Engineering Manager","url":"https://github.com/innobead","image_url":"https://github.com/innobead.png","imageURL":"https://github.com/innobead.png"},{"name":"Jillian Maroket","title":"Technical Writer","url":"https://github.com/jillian-maroket/","image_url":"https://github.com/jillian-maroket.png","imageURL":"https://github.com/jillian-maroket.png"}],"frontMatter":{"title":"Best Practices for Optimizing Longhorn Disk Performance","description":"Follow the recommendations for achieving optimal disk performance.","slug":"best_practices_for_optimizing_longhorn_disk_performance","authors":[{"name":"David Ko","title":"Senior Software Engineering Manager","url":"https://github.com/innobead","image_url":"https://github.com/innobead.png","imageURL":"https://github.com/innobead.png"},{"name":"Jillian Maroket","title":"Technical Writer","url":"https://github.com/jillian-maroket/","image_url":"https://github.com/jillian-maroket.png","imageURL":"https://github.com/jillian-maroket.png"}],"tags":["harvester","longhorn","best practices","disk performance"],"hide_table_of_contents":false},"prevItem":{"title":"Calculation of Resource Metrics in Harvester","permalink":"/kb/calculation_of_resource_metrics_in_harvester"},"nextItem":{"title":"VM Live Migration Policy and Configuration","permalink":"/kb/vm_live_migration_policy_and_configuration"}},"content":"The Longhorn documentation provides [best practice recommendations](https://longhorn.io/docs/1.5.3/best-practices/) for deploying Longhorn in production environments. Before configuring workloads, ensure that you have set up the following basic requirements for optimal disk performance.\\n\\n- SATA/NVMe SSDs or disk drives with similar performance\\n- 10 Gbps network bandwidth between nodes\\n- Dedicated Priority Classes for system-managed and user-deployed Longhorn components\\n\\nThe following sections outline other recommendations for achieving optimal disk performance.\\n\\n## IO Performance\\n\\n- **Storage network**: Use a [dedicated storage network](https://docs.harvesterhci.io/v1.2/advanced/storagenetwork) to improve IO performance and stability. \\n\\n- **Longhorn disk**: Use a [dedicated disk](https://docs.harvesterhci.io/v1.2/host/#multi-disk-management) for Longhorn storage instead of using the root disk. \\n\\n- **Replica count**: Set the [default replica count](https://docs.harvesterhci.io/v1.2/advanced/storageclass#parameters-tab) to \\"2\\" to achieve data availability with better disk space usage or less impact to system performance. This practice is especially beneficial to data-intensive applications. \\n\\n- **Storage tag**: Use storage tags to define storage tiering for data-intensive applications. For example, only high-performance disks can be used for storing performance-sensitive data. You can either [add disks with tags](https://docs.harvesterhci.io/v1.2/host/#storage-tags) or [create StorageClasses with tags](https://docs.harvesterhci.io/v1.2/advanced/storageclass#disk-selector-optional). \\n\\n- **Data locality**: Use `best-effort` as the default [data locality](https://longhorn.io/docs/1.5.3/high-availability/data-locality/) of Longhorn Storage Classes. \\n\\n For applications that support data replication (for example, a distributed database), you can use the `strict-local` option to ensure that only one replica is created for each volume. This practice prevents the extra disk space usage and IO performance overhead associated with volume replication. \\n\\n For data-intensive applications, you can use pod scheduling functions such as node selector or taint toleration. These functions allow you to schedule the workload to a specific storage-tagged node together with one replica. \\n\\n## Space Efficiency \\n\\n- **Recurring snapshots**: Periodically clean up system-generated snapshots and retain only the number of snapshots that makes sense for your implementation. \\n\\n For applications with replication capability, periodically [delete all types of snapshots](https://longhorn.io/docs/1.5.3/concepts/#243-deleting-snapshots).\\n\\n## Disaster Recovery\\n\\n- **Recurring backups**: Create [recurring backup jobs](https://longhorn.io/docs/1.5.3/volumes-and-nodes/trim-filesystem/) for mission-critical application volumes.\\n\\n- **System backup**: Run periodic system backups."},{"id":"vm_live_migration_policy_and_configuration","metadata":{"permalink":"/kb/vm_live_migration_policy_and_configuration","editUrl":"https://github.com/harvester/harvesterhci.io/edit/main/kb/kb/2023-09-01/vm_live_migration_policy_and_configuration.md","source":"@site/kb/2023-09-01/vm_live_migration_policy_and_configuration.md","title":"VM Live Migration Policy and Configuration","description":"Know how VM live migration works, the migration policies, how to tune the policies and check status","date":"2023-09-01T00:00:00.000Z","formattedDate":"September 1, 2023","tags":[{"label":"harvester","permalink":"/kb/tags/harvester"},{"label":"virtual machine","permalink":"/kb/tags/virtual-machine"},{"label":"VM","permalink":"/kb/tags/vm"},{"label":"live migration","permalink":"/kb/tags/live-migration"},{"label":"policy","permalink":"/kb/tags/policy"},{"label":"strategy","permalink":"/kb/tags/strategy"},{"label":"configuration","permalink":"/kb/tags/configuration"}],"readingTime":10.58,"truncated":false,"authors":[{"name":"Jian Wang","title":"Staff Software Engineer","url":"https://github.com/w13915984028","image_url":"https://github.com/w13915984028.png","imageURL":"https://github.com/w13915984028.png"}],"frontMatter":{"title":"VM Live Migration Policy and Configuration","description":"Know how VM live migration works, the migration policies, how to tune the policies and check status","slug":"vm_live_migration_policy_and_configuration","authors":[{"name":"Jian Wang","title":"Staff Software Engineer","url":"https://github.com/w13915984028","image_url":"https://github.com/w13915984028.png","imageURL":"https://github.com/w13915984028.png"}],"tags":["harvester","virtual machine","VM","live migration","policy","strategy","configuration"],"hide_table_of_contents":false},"prevItem":{"title":"Best Practices for Optimizing Longhorn Disk Performance","permalink":"/kb/best_practices_for_optimizing_longhorn_disk_performance"},"nextItem":{"title":"Use Rook Ceph External Storage with Harvester","permalink":"/kb/use_rook_ceph_external_storage"}},"content":"In Harvester, the **VM Live Migration** is well supported by the UI. Please refer to [Harvester VM Live Migration](https://docs.harvesterhci.io/v1.1/vm/live-migration) for more details.\\n\\nThe VM Live Migration process is finished smoothly in most cases. However, sometimes the migration may get stuck and not end as expected.\\n\\nThis article dives into the VM Live Migration process in more detail. There are three main parts:\\n\\n- General Process of VM Live Migration\\n- VM Live Migration Strategies\\n- VM Live Migration Configurations\\n\\nRelated issues:\\n\\n- [Migration should show the proper status and progress in the UI](https://github.com/harvester/harvester/issues/4352)\\n- [VM Migration policy and status](https://github.com/harvester/harvester/issues/4376)\\n\\n:::note\\n\\nA big part of the following contents are copied from `kubevirt` document https://kubevirt.io/user-guide/operations/live_migration/, some contents/formats are adjusted to fit in this document.\\n\\n:::\\n\\n## General Process of VM Live Migration\\n\\n### Starting a Migration from Harvester UI\\n\\n1. Go to the **Virtual Machines** page.\\n1. Find the virtual machine that you want to migrate and select **\u22ee** > **Migrate**.\\n1. Choose the node to which you want to migrate the virtual machine and select **Apply**.\\n\\nAfter successfully selecting **Apply**, a CRD `VirtualMachineInstanceMigration` object is created, and the related `controller/operator` will start the process.\\n\\n### Migration CRD Object\\n\\nYou can also create the CRD `VirtualMachineInstanceMigration` object manually via `kubectl` or other tools.\\n\\nThe example below starts a migration process for a virtual machine instance (VMI) `new-vm`.\\n\\n```\\napiVersion: kubevirt.io/v1\\nkind: VirtualMachineInstanceMigration\\nmetadata:\\n name: migration-job\\nspec:\\n vmiName: new-vm\\n```\\n\\nUnder the hood, the open source projects `Kubevirt, Libvirt, QEMU, ... ` perform most of the `VM Live Migration`. [References.](#references)\\n\\n### Migration Status Reporting\\n\\nWhen starting a virtual machine instance (VMI), it has also been calculated whether the machine is live migratable. The result is being stored in the VMI `VMI.status.conditions`. The calculation can be based on multiple parameters of the VMI, however, at the moment, the calculation is largely based on the Access Mode of the VMI volumes. Live migration is only permitted when the volume access mode is set to ReadWriteMany. Requests to migrate a non-LiveMigratable VMI will be rejected.\\n\\nThe reported Migration Method is also being calculated during VMI start. `BlockMigration` indicates that some of the VMI disks require copying from the source to the destination. `LiveMigration` means that only the instance memory will be copied.\\n\\n```\\nStatus:\\n Conditions:\\n Status: True\\n Type: LiveMigratable\\n Migration Method: BlockMigration\\n```\\n\\n### Migration Status\\n\\nThe migration progress status is reported in `VMI.status`. Most importantly, it indicates whether the migration has been completed or failed.\\n\\nBelow is an example of a successful migration.\\n\\n```\\nMigration State:\\n Completed: true\\n End Timestamp: 2019-03-29T03:37:52Z\\n Migration Config:\\n Completion Timeout Per GiB: 800\\n Progress Timeout: 150\\n Migration UID: c64d4898-51d3-11e9-b370-525500d15501\\n Source Node: node02\\n Start Timestamp: 2019-03-29T04:02:47Z\\n Target Direct Migration Node Ports:\\n 35001: 0\\n 41068: 49152\\n 38284: 49153\\n Target Node: node01\\n Target Node Address: 10.128.0.46\\n Target Node Domain Detected: true\\n Target Pod: virt-launcher-testvmimcbjgw6zrzcmp8wpddvztvzm7x2k6cjbdgktwv8tkq\\n```\\n\\n## VM Live Migration Strategies\\n\\nVM Live Migration is a process during which a running Virtual Machine Instance moves to another compute node while the guest workload continues to run and remain accessible.\\n\\n### Understanding Different VM Live Migration Strategies\\n\\nVM Live Migration is a complex process. During a migration, the source VM needs to transfer its whole state (mainly RAM) to the target VM. If there are enough resources available, such as network bandwidth and CPU power, migrations should converge nicely. If this is not the scenario, however, the migration might get stuck without an ability to progress.\\n\\nThe main factor that affects migrations from the guest perspective is its dirty rate, which is the rate by which the VM dirties memory. Guests with high dirty rate lead to a race during migration. On the one hand, memory would be transferred continuously to the target, and on the other, the same memory would get dirty by the guest. On such scenarios, one could consider to use more advanced migration strategies. Refer to [Understanding different migration strategies](https://kubevirt.io/user-guide/operations/live_migration/#understanding-different-migration-strategies) for more details.\\n\\nThere are 3 `VM Live Migration` strategies/policies:\\n\\n#### VM Live Migration Strategy: Pre-copy\\n\\nPre-copy is the default strategy. It should be used for most cases.\\n\\nThe way it works is as following:\\n1. The target VM is created, but the guest keeps running on the source VM.\\n1. The source starts sending chunks of VM state (mostly memory) to the target. This continues until all of the state has been transferred to the target.\\n1. The guest starts executing on the target VM. 4. The source VM is being removed.\\n\\nPre-copy is the safest and fastest strategy for most cases. Furthermore, it can be easily cancelled, can utilize multithreading, and more. If there is no real reason to use another strategy, this is definitely the strategy to go with.\\n\\nHowever, on some cases migrations might not converge easily, that is, by the time the chunk of source VM state would be received by the target VM, it would already be mutated by the source VM (which is the VM the guest executes on). There are many reasons for migrations to fail converging, such as a high dirty-rate or low resources like network bandwidth and CPU. On such scenarios, see the following alternative strategies below.\\n\\n#### VM Live Migration Strategy: Post-copy\\n\\nThe way post-copy migrations work is as following:\\n1. The target VM is created.\\n1. The guest is being run on the target VM.\\n1. The source starts sending chunks of VM state (mostly memory) to the target.\\n1. When the guest, running on the target VM, would access memory: 1. If the memory exists on the target VM, the guest can access it. 2. Otherwise, the target VM asks for a chunk of memory from the source VM.\\n1. Once all of the memory state is updated at the target VM, the source VM is being removed.\\n\\nThe main idea here is that the guest starts to run immediately on the target VM. This approach has advantages and disadvantages:\\n\\n**Advantages:**\\n\\n- The same memory chink is never being transferred twice. This is possible due to the fact that with post-copy it doesn\'t matter that a page had been dirtied since the guest is already running on the target VM.\\n- This means that a high dirty-rate has much less effect.\\n- Consumes less network bandwidth.\\n\\n**Disadvantages:**\\n\\n- When using post-copy, the VM state has no one source of truth. When the guest (running on the target VM) writes to memory, this memory is one part of the guest\'s state, but some other parts of it may still be updated only at the source VM. This situation is generally dangerous, since, for example, if either the target or guest VMs crash the state cannot be recovered.\\n- Slow warmup: when the guest starts executing, no memory is present at the target VM. Therefore, the guest would have to wait for a lot of memory in a short period of time.\\n- Slower than pre-copy on most cases.\\n- Harder to cancel a migration.\\n\\n#### VM Live Migration Strategy: Auto-converge\\n\\nAuto-converge is a technique to help pre-copy migrations converge faster without changing the core algorithm of how the migration works.\\n\\nSince a high dirty-rate is usually the most significant factor for migrations to not converge, auto-converge simply throttles the guest\'s CPU. If the migration would converge fast enough, the guest\'s CPU would not be throttled or throttled negligibly. But, if the migration would not converge fast enough, the CPU would be throttled more and more as time goes.\\n\\nThis technique dramatically increases the probability of the migration converging eventually.\\n\\n### Observe the VM Live Migration Progress and Result\\n\\n#### Migration Timeouts\\n\\nDepending on the type, the live migration process will copy virtual machine memory pages and disk blocks to the destination. During this process non-locked pages and blocks are being copied and become free for the instance to use again. To achieve a successful migration, it is assumed that the instance will write to the free pages and blocks (pollute the pages) at a lower rate than these are being copied.\\n\\n#### Completion Time\\n\\nIn some cases the virtual machine can write to different memory pages / disk blocks at a higher rate than these can be copied, which will prevent the migration process from completing in a reasonable amount of time. In this case, live migration will be aborted if it is running for a long period of time. The timeout is calculated base on the size of the VMI, it\'s memory and the ephemeral disks that are needed to be copied. The configurable parameter completionTimeoutPerGiB, which defaults to 800s is the time for GiB of data to wait for the migration to be completed before aborting it. A VMI with 8Gib of memory will time out after 6400 seconds.\\n\\n#### Progress Timeout\\n\\nA VM Live Migration will also be aborted when it notices that copying memory doesn\'t make any progress. The time to wait for live migration to make progress in transferring data is configurable by the `progressTimeout` parameter, which defaults to 150 seconds.\\n\\n## VM Live Migration Configurations\\n\\n### Changing Cluster Wide Migration Limits\\n\\nKubeVirt puts some limits in place so that migrations don\'t overwhelm the cluster. By default, it is to only run 5 migrations in parallel with an additional limit of a maximum of 2 outbound migrations per node. Finally, every migration is limited to a bandwidth of 64MiB/s.\\n\\nYou can change these values in the `kubevirt` CR:\\n```\\n apiVersion: kubevirt.io/v1\\n kind: Kubevirt\\n metadata:\\n name: kubevirt\\n namespace: kubevirt\\n spec:\\n configuration:\\n migrations:\\n parallelMigrationsPerCluster: 5\\n parallelOutboundMigrationsPerNode: 2\\n bandwidthPerMigration: 64Mi\\n completionTimeoutPerGiB: 800\\n progressTimeout: 150\\n disableTLS: false\\n nodeDrainTaintKey: \\"kubevirt.io/drain\\"\\n allowAutoConverge: false ---------------------\x3e related to: Auto-converge\\n allowPostCopy: false -------------------------\x3e related to: Post-copy\\n unsafeMigrationOverride: false\\n```\\n\\nRemember that most of these configurations can be overridden and fine-tuned to a specified group of VMs. For more information, please refer to the Migration Policies section below.\\n\\n### Migration Policies\\n\\n[Migration policies](https://kubevirt.io/user-guide/operations/migration_policies/) provides a new way of applying migration configurations to Virtual Machines. The policies can refine Kubevirt CR\'s `MigrationConfiguration` that sets the cluster-wide migration configurations. This way, the cluster-wide settings default how the migration policy can be refined (i.e., changed, removed, or added).\\n\\nRemember that migration policies are in version `v1alpha1`. This means that this API is not fully stable yet and that APIs may change in the future.\\n\\n#### Migration Configurations\\n\\nCurrently, the `MigrationPolicy` spec only includes the following configurations from Kubevirt CR\'s `MigrationConfiguration`. (In the future, more configurations that aren\'t part of Kubevirt CR will be added):\\n\\n```\\napiVersion: migrations.kubevirt.io/v1alpha1\\nkind: MigrationPolicy\\n spec:\\n allowAutoConverge: true\\n bandwidthPerMigration: 217Ki\\n completionTimeoutPerGiB: 23\\n allowPostCopy: false\\n```\\n\\nAll the above fields are optional. When omitted, the configuration will be applied as defined in KubevirtCR\'s `MigrationConfiguration`. This way, KubevirtCR will serve as a configurable set of defaults for both VMs that are not bound to any `MigrationPolicy` and VMs that are bound to a `MigrationPolicy` that does not define all fields of the configurations.\\n\\n##### Matching Policies to VMs\\n\\nNext in the spec are the selectors defining the group of VMs to apply the policy. The options to do so are the following.\\n\\nThis policy applies to the VMs in namespaces that have all the required labels:\\n\\n```\\napiVersion: migrations.kubevirt.io/v1alpha1\\nkind: MigrationPolicy\\n spec:\\n selectors:\\n namespaceSelector:\\n hpc-workloads: true # Matches a key and a value\\n```\\n\\nThe policy below applies to the VMs that have all the required labels:\\n\\n```\\napiVersion: migrations.kubevirt.io/v1alpha1\\nkind: MigrationPolicy\\n spec:\\n selectors:\\n virtualMachineInstanceSelector:\\n workload-type: db # Matches a key and a value\\n```\\n\\n## References\\n\\n### Documents\\n\\n### Libvirt Guest Migration\\n\\n`Libvirt` has a chapter to describe the pricipal of `VM/Guest Live Migration`.\\n\\nhttps://libvirt.org/migration.html\\n\\n### Kubevirt Live Migration\\n\\nhttps://kubevirt.io/user-guide/operations/live_migration/\\n\\n### Source Code\\n\\nThe `VM Live Migration` related configuration options are passed to each layer correspondingly.\\n\\n#### Kubevirt\\n\\nhttps://github.com/kubevirt/kubevirt/blob/d425593ae392111dab80403ef0cde82625e37653/pkg/virt-launcher/virtwrap/live-migration-source.go#L103\\n\\n```\\n...\\nimport \\"libvirt.org/go/libvirt\\"\\n\\n...\\n\\nfunc generateMigrationFlags(isBlockMigration, migratePaused bool, options *cmdclient.MigrationOptions) libvirt.DomainMigrateFlags {\\n...\\n\\tif options.AllowAutoConverge {\\n\\t\\tmigrateFlags |= libvirt.MIGRATE_AUTO_CONVERGE\\n\\t}\\n\\tif options.AllowPostCopy {\\n\\t\\tmigrateFlags |= libvirt.MIGRATE_POSTCOPY\\n\\t}\\n...\\n}\\n```\\n\\n#### Go Package Libvirt\\n\\nhttps://pkg.go.dev/libvirt.org/go/libvirt\\n\\n```\\nconst (\\n...\\n\\tMIGRATE_AUTO_CONVERGE = DomainMigrateFlags(C.VIR_MIGRATE_AUTO_CONVERGE)\\n\\tMIGRATE_RDMA_PIN_ALL = DomainMigrateFlags(C.VIR_MIGRATE_RDMA_PIN_ALL)\\n\\tMIGRATE_POSTCOPY = DomainMigrateFlags(C.VIR_MIGRATE_POSTCOPY)\\n...\\n)\\n```\\n\\n#### Libvirt\\n\\nhttps://github.com/libvirt/libvirt/blob/bfe53e9145cd5996a791c5caff0686572b850f82/include/libvirt/libvirt-domain.h#L1030\\n\\n```\\n /* Enable algorithms that ensure a live migration will eventually converge.\\n * This usually means the domain will be slowed down to make sure it does\\n * not change its memory faster than a hypervisor can transfer the changed\\n * memory to the destination host. VIR_MIGRATE_PARAM_AUTO_CONVERGE_*\\n * parameters can be used to tune the algorithm.\\n *\\n * Since: 1.2.3\\n */\\n VIR_MIGRATE_AUTO_CONVERGE = (1 << 13),\\n...\\n /* Setting the VIR_MIGRATE_POSTCOPY flag tells libvirt to enable post-copy\\n * migration. However, the migration will start normally and\\n * virDomainMigrateStartPostCopy needs to be called to switch it into the\\n * post-copy mode. See virDomainMigrateStartPostCopy for more details.\\n *\\n * Since: 1.3.3\\n */\\n VIR_MIGRATE_POSTCOPY = (1 << 15),\\n```"},{"id":"use_rook_ceph_external_storage","metadata":{"permalink":"/kb/use_rook_ceph_external_storage","editUrl":"https://github.com/harvester/harvesterhci.io/edit/main/kb/kb/2023-08-23/using_rook_ceph_storage.md","source":"@site/kb/2023-08-23/using_rook_ceph_storage.md","title":"Use Rook Ceph External Storage with Harvester","description":"Use Rook Ceph External Storage with Harvester","date":"2023-08-23T00:00:00.000Z","formattedDate":"August 23, 2023","tags":[{"label":"harvester","permalink":"/kb/tags/harvester"},{"label":"rook","permalink":"/kb/tags/rook"},{"label":"ceph","permalink":"/kb/tags/ceph"},{"label":"csi","permalink":"/kb/tags/csi"}],"readingTime":3.86,"truncated":false,"authors":[{"name":"Hang Yu","title":"Staff Software Engineer","url":"https://github.com/futuretea","image_url":"https://github.com/futuretea.png","imageURL":"https://github.com/futuretea.png"}],"frontMatter":{"title":"Use Rook Ceph External Storage with Harvester","description":"Use Rook Ceph External Storage with Harvester","slug":"use_rook_ceph_external_storage","authors":[{"name":"Hang Yu","title":"Staff Software Engineer","url":"https://github.com/futuretea","image_url":"https://github.com/futuretea.png","imageURL":"https://github.com/futuretea.png"}],"tags":["harvester","rook","ceph","csi"],"hide_table_of_contents":false},"prevItem":{"title":"VM Live Migration Policy and Configuration","permalink":"/kb/vm_live_migration_policy_and_configuration"},"nextItem":{"title":"Upgrade Guest Kubernetes Clusters to be Compatible with Harvester IP Pools","permalink":"/kb/upgrading_guest_clusters_with_harvester_ip_pool_compatibility"}},"content":"Starting with Harvester v1.2.0, it offers the capability to install a Container Storage Interface (CSI) in your Harvester cluster. This allows you to leverage external storage for the Virtual Machine\'s non-system data disk, giving you the flexibility to use different drivers tailored for specific needs, whether it\'s for performance optimization or seamless integration with your existing in-house storage solutions.\\n\\nIt\'s important to note that, despite this enhancement, the provisioner for the Virtual Machine (VM) image in Harvester still relies on Longhorn. Prior to version 1.2.0, Harvester exclusively supported Longhorn for storing VM data and did not offer support for external storage as a destination for VM data.\\n\\nOne of the options for integrating external storage with Harvester is Rook, an open-source cloud-native storage orchestrator. Rook provides a robust platform, framework, and support for Ceph storage, enabling seamless integration with cloud-native environments.\\n\\n[Ceph](https://ceph.io) is a software-defined distributed storage system that offers versatile storage capabilities, including file, block, and object storage. It is designed for large-scale production clusters and can be deployed effectively in such environments.\\n\\n[Rook](https://rook.io) simplifies the deployment and management of Ceph, offering self-managing, self-scaling, and self-healing storage services. It leverages Kubernetes resources to automate the deployment, configuration, provisioning, scaling, upgrading, and monitoring of Ceph.\\n\\nIn this article, we will walk you through the process of installing, configuring, and utilizing [Rook](https://rook.io/docs/rook/v1.12/Getting-Started/intro/) to use storage from an [existing external Ceph cluster](https://www.rook.io/docs/rook/v1.12/CRDs/Cluster/external-cluster/) as a data disk for a VM within the Harvester environment.\\n\\n## Install Harvester Cluster\\n\\nHarvester\'s operating system follows an immutable design, meaning that most OS files revert to their pre-configured state after a reboot. To accommodate Rook Ceph\'s requirements, you need to add specific persistent paths to the `os.persistentStatePaths` section in the [Harvester configuration](https://docs.harvesterhci.io/dev/install/harvester-configuration#ospersistent_state_paths). These paths include:\\n\\n```yaml\\nos:\\n persistent_state_paths:\\n - /var/lib/rook\\n - /var/lib/ceph\\n modules:\\n - rbd\\n - nbd\\n```\\n\\nAfter the cluster is installed, refer to [How can I access the kubeconfig file of the Harvester cluster?](https://docs.harvesterhci.io/v1.1/faq#how-can-i-access-the-kubeconfig-file-of-the-harvester-cluster) to get the kubeconfig of the Harvester cluster.\\n\\n## Install Rook to Harvester\\n\\nInstall Rook to the Harvester cluster by referring to [Rook Quickstart](https://rook.io/docs/rook/v1.12/Getting-Started/quickstart/).\\n\\n```bash\\ncurl -fsSLo rook.tar.gz https://github.com/rook/rook/archive/refs/tags/v1.12.2.tar.gz \\\\\\n && tar -zxf rook.tar.gz && cd rook-1.12.2/deploy/examples\\n# apply configurations ref: https://rook.github.io/docs/rook/v1.12/Getting-Started/example-configurations/\\nkubectl apply -f crds.yaml -f common.yaml -f operator.yaml\\nkubectl -n rook-ceph wait --for=condition=Available deploy rook-ceph-operator --timeout=10m\\n```\\n\\n## Using an existing external Ceph cluster\\n\\n1. Run the python script `create-external-cluster-resources.py` in the [existing external Ceph cluster](https://www.rook.io/docs/rook/v1.12/CRDs/Cluster/external-cluster/) for creating all users and keys.\\n```bash\\n# script help ref: https://www.rook.io/docs/rook/v1.12/CRDs/Cluster/external-cluster/#1-create-all-users-and-keys\\ncurl -s https://raw.githubusercontent.com/rook/rook/v1.12.2/deploy/examples/create-external-cluster-resources.py > create-external-cluster-resources.py\\npython3 create-external-cluster-resources.py --rbd-data-pool-name --namespace rook-ceph-external --format bash\\n```\\n\\n2. Copy the Bash output.\\n\\nExample output:\\n```\\nexport NAMESPACE=rook-ceph-external\\nexport ROOK_EXTERNAL_FSID=b3b47828-4c60-11ee-be38-51902f85c805\\nexport ROOK_EXTERNAL_USERNAME=client.healthchecker\\nexport ROOK_EXTERNAL_CEPH_MON_DATA=ceph-1=192.168.5.99:6789\\nexport ROOK_EXTERNAL_USER_SECRET=AQDd6/dkFyu/IhAATv/uCMbHtWk4AYK2KXzBhQ==\\nexport ROOK_EXTERNAL_DASHBOARD_LINK=https://192.168.5.99:8443/\\nexport CSI_RBD_NODE_SECRET=AQDd6/dk2HsjIxAA06Yw9UcOg0dfwV/9IFBRhA==\\nexport CSI_RBD_NODE_SECRET_NAME=csi-rbd-node\\nexport CSI_RBD_PROVISIONER_SECRET=AQDd6/dkEY1kIxAAAzrXZnVRf4x+wDUz1zyaQg==\\nexport CSI_RBD_PROVISIONER_SECRET_NAME=csi-rbd-provisioner\\nexport MONITORING_ENDPOINT=192.168.5.99\\nexport MONITORING_ENDPOINT_PORT=9283\\nexport RBD_POOL_NAME=test\\nexport RGW_POOL_PREFIX=default\\n```\\n\\n3. Consume the external Ceph cluster resources on the Harvester cluster.\\n\\n```bash\\n# Paste the above output from create-external-cluster-resources.py into import-env.sh\\nvim import-env.sh\\nsource import-env.sh\\n# this script will create a StorageClass ceph-rbd\\nsource import-external-cluster.sh\\n```\\n\\n```bash\\nkubectl apply -f common-external.yaml\\nkubectl apply -f cluster-external.yaml\\n# wait for all pods to become Ready\\nwatch \'kubectl --namespace rook-ceph get pods\'\\n```\\n\\n4. Create the VolumeSnapshotClass `csi-rbdplugin-snapclass-external`.\\n\\n```bash\\ncat >./csi/rbd/snapshotclass-external.yaml < **Settings**.\\n1. Find and select **csi-driver-config**, and then click on the **\u22ee** > **Edit Setting** to access the configuration options.\\n1. In the settings, set the **Provisioner** to `rook-ceph.rbd.csi.ceph.com`.\\n2. Next, specify the **Volume Snapshot Class Name** as `csi-rbdplugin-snapclass-external`. This setting points to the name of the `VolumeSnapshotClass` used for creating volume snapshots or VM snapshots.\\n3. Similarly, set the **Backup Volume Snapshot Class Name** to `csi-rbdplugin-snapclass-external`. This corresponds to the name of the `VolumeSnapshotClass` responsible for creating VM backups.\\n\\n![csi-driver-config-external](./imgs/csi-driver-config-external.png)\\n\\n## Use Rook Ceph in Harvester\\n\\nAfter successfully configuring these settings, you can proceed to utilize the Rook Ceph StorageClass, which is named `rook-ceph-block` for the internal Ceph cluster or named `ceph-rbd` for the external Ceph cluster. You can apply this StorageClass when creating an empty volume or adding a new block volume to a VM, enhancing your Harvester cluster\'s storage capabilities.\\n\\nWith these configurations in place, your Harvester cluster is ready to make the most of the Rook Ceph storage integration.\\n\\n![rook-ceph-volume-external](./imgs/rook-ceph-volume-external.png)\\n\\n![rook-ceph-vm-external](./imgs/rook-ceph-vm-external.png)"},{"id":"upgrading_guest_clusters_with_harvester_ip_pool_compatibility","metadata":{"permalink":"/kb/upgrading_guest_clusters_with_harvester_ip_pool_compatibility","editUrl":"https://github.com/harvester/harvesterhci.io/edit/main/kb/kb/2023-08-21/compatible_with_ip_pool_new_feature.md","source":"@site/kb/2023-08-21/compatible_with_ip_pool_new_feature.md","title":"Upgrade Guest Kubernetes Clusters to be Compatible with Harvester IP Pools","description":"Explain how to keep load balancer IP during upgrading guest cluster","date":"2023-08-21T00:00:00.000Z","formattedDate":"August 21, 2023","tags":[{"label":"harvester","permalink":"/kb/tags/harvester"},{"label":"load balancer","permalink":"/kb/tags/load-balancer"},{"label":"cloud provider","permalink":"/kb/tags/cloud-provider"},{"label":"ip pool","permalink":"/kb/tags/ip-pool"},{"label":"upgrade","permalink":"/kb/tags/upgrade"}],"readingTime":2.675,"truncated":false,"authors":[{"name":"Canwu Yao","title":"Software Engineer","url":"https://github.com/yaocw2020","image_url":"https://avatars.githubusercontent.com/u/7421463?s=400&v=4","imageURL":"https://avatars.githubusercontent.com/u/7421463?s=400&v=4"}],"frontMatter":{"title":"Upgrade Guest Kubernetes Clusters to be Compatible with Harvester IP Pools","description":"Explain how to keep load balancer IP during upgrading guest cluster","slug":"upgrading_guest_clusters_with_harvester_ip_pool_compatibility","authors":[{"name":"Canwu Yao","title":"Software Engineer","url":"https://github.com/yaocw2020","image_url":"https://avatars.githubusercontent.com/u/7421463?s=400&v=4","imageURL":"https://avatars.githubusercontent.com/u/7421463?s=400&v=4"}],"tags":["harvester","load balancer","cloud provider","ip pool","upgrade"],"hide_table_of_contents":false},"prevItem":{"title":"Use Rook Ceph External Storage with Harvester","permalink":"/kb/use_rook_ceph_external_storage"},"nextItem":{"title":"Using NetApp Storage on Harvester","permalink":"/kb/install_netapp_trident_csi"}},"content":"As **Harvester v1.2.0** is released, a new Harvester cloud provider version **0.2.2** is integrated into RKE2 **v1.24.15+rke2r1**, **v1.25.11+rke2r1**, **v1.26.6+rke2r1**, **v1.27.3+rke2r1**, and newer versions.\\n\\nWith Harvester v1.2.0, the new Harvester cloud provider offers enhanced load balancing capabilities for guest Kubernetes services. Specifically, it introduces the Harvester IP Pool feature, a built-in IP address management (IPAM) solution for the Harvester load balancer. It allows you to define an IP pool specific to a particular guest cluster by specifying the guest cluster name. For example, you can create an IP pool exclusively for the guest cluster named cluster2:\\n\\n![image](ippoolforcluster2.png)\\n\\nHowever, after upgrading, the feature is not automatically compatible with existing guest Kubernetes clusters, as they do not pass the correct cluster name to the Harvester cloud provider. Refer to [issue 4232](https://github.com/harvester/harvester/issues/4232) for more details. Users can manually upgrade the Harvester cloud provider using Helm as a workaround and provide the correct cluster name after upgrading. However, this would result in a change in the load balancer IPs. \\n\\nThis article outlines a workaround that allows you to leverage the new IP pool feature while keeping the load balancer IPs unchanged.\\n\\n## Prerequisites\\n\\n- Download the Harvester kubeconfig file from the Harvester UI. If you have imported Harvester into Rancher, do not use the kubeconfig file from the Rancher UI. Refer to [Access Harvester Cluster](https://docs.harvesterhci.io/v1.1/faq#how-can-i-access-the-kubeconfig-file-of-the-harvester-cluster) to get the desired one.\\n\\n- Download the kubeconfig file for the guest Kubernetes cluster you plan to upgrade. Refer to [Accessing Clusters with kubectl from Your Workstation](https://ranchermanager.docs.rancher.com/how-to-guides/new-user-guides/manage-clusters/access-clusters/use-kubectl-and-kubeconfig#accessing-clusters-with-kubectl-from-your-workstation) for instructions on how to download the kubeconfig file.\\n\\n## Steps to Keep Load Balancer IP\\n\\n1. Execute the following script before upgrading.\\n ```\\n curl -sfL https://raw.githubusercontent.com/harvester/harvesterhci.io/main/kb/2023-08-21/keepip.sh | sh -s before_upgrade \\n ```\\n\\n - ``: Path to the Harvester kubeconfig file.\\n - ``: Path to the kubeconfig file of your guest Kubernetes cluster.\\n - ``: Name of your guest cluster.\\n - ``: Namespace where the VMs of the guest cluster are located.\\n\\n The script will help users copy the DHCP information to the service annotation and modify the IP pool allocated history to make sure the IP is unchanged.\\n\\n ![image](before-upgrade.png)\\n\\n After executing the script, the load balancer service with DHCP mode will be annotated with the DHCP information. For example:\\n\\n ``` yaml\\n apiVersion: v1\\n kind: Service\\n metadata:\\n annotations:\\n kube-vip.io/hwaddr: 00:00:6c:4f:18:68\\n kube-vip.io/requestedIP: 172.19.105.215\\n name: lb0\\n namespace: default\\n ```\\n\\n As for the load balancer service with pool mode, the IP pool allocated history will be modified as the new load balancer name. For example:\\n\\n ``` yaml\\n apiVersion: loadbalancer.harvesterhci.io/v1beta1\\n kind: IPPool\\n metadata:\\n name: default\\n spec:\\n ...\\n status:\\n allocatedHistory:\\n 192.168.100.2: default/cluster-name-default-lb1-ddc13071 # replace the new load balancer name\\n ```\\n\\n2. Add network selector for the pool.\\n\\n For example, the following cluster is under the VM network `default/mgmt-untagged`. The network selector should be `default/mgmt-untagged`.\\n\\n ![image](network.png)\\n\\n ![image](network-selector.png)\\n\\n3. Upgrade the RKE2 cluster in the Rancher UI and select the new version.\\n \\n ![image](upgrade.png)\\n\\n1. Execute the script after upgrading.\\n ```\\n curl -sfL https://raw.githubusercontent.com/harvester/harvesterhci.io/main/kb/2023-08-21/keepip.sh | sh -s after_upgrade \\n ```\\n ![image](before-upgrade.png)\\n \\n In this step, the script wraps the operations to upgrade the Harvester cloud provider to set the cluster name. After the Harvester cloud provider is running, the new Harvester load balancers will be created with the unchanged IPs."},{"id":"install_netapp_trident_csi","metadata":{"permalink":"/kb/install_netapp_trident_csi","editUrl":"https://github.com/harvester/harvesterhci.io/edit/main/kb/kb/2023-08-11/using_netapp_third_party_storage.md","source":"@site/kb/2023-08-11/using_netapp_third_party_storage.md","title":"Using NetApp Storage on Harvester","description":"Installation procedure for NetApp Astra Trident CSI Driver","date":"2023-08-11T00:00:00.000Z","formattedDate":"August 11, 2023","tags":[{"label":"harvester","permalink":"/kb/tags/harvester"}],"readingTime":6.08,"truncated":false,"authors":[{"name":"Jeff Radick","title":"Staff Software Engineer"}],"frontMatter":{"title":"Using NetApp Storage on Harvester","description":"Installation procedure for NetApp Astra Trident CSI Driver","slug":"install_netapp_trident_csi","authors":[{"name":"Jeff Radick","title":"Staff Software Engineer"}],"tags":["harvester"],"hide_table_of_contents":false},"prevItem":{"title":"Upgrade Guest Kubernetes Clusters to be Compatible with Harvester IP Pools","permalink":"/kb/upgrading_guest_clusters_with_harvester_ip_pool_compatibility"},"nextItem":{"title":"Configure PriorityClass on Longhorn System Components","permalink":"/kb/configure_priority_class_longhorn"}},"content":"This article covers instructions for installing the Netapp Astra Trident CSI driver into a Harvester cluster, which enables NetApp storage systems to store storage volumes usable by virtual machines running in Harvester.\\n\\nThe NetApp storage will be an option in addition to the normal Longhorn storage; it will not replace Longhorn. Virtual machine images will still be stored using Longhorn.\\n\\nThis has been tested with Harvester 1.2.0 and Trident v23.07.0.\\n\\nThis procedure only works to access storage via iSCSI, not NFS.\\n\\n:::note\\n3rd party storage classes (including those based on Trident) can only be used for non-boot volumes of Harvester VMs.\\n:::\\n\\n# Detailed Instructions\\n\\nWe assume that before beginning this procedure, a Harvester cluster and a NetApp ONTAP storage system are both installed and configured for use.\\n\\nMost of these steps can be performed on any system with the `helm` and `kubectl` commands installed and network connectivity to the management port of the Harvester cluster. Let\'s call this your workstation. Certain steps must be performed on one or more cluster nodes themselves. The steps described below should be done on your workstation unless otherwise indicated.\\n\\nThe last step (enabling multipathd) should be done on all nodes after the Trident CSI has been installed.\\n\\nCertain parameters of your installation will require modification of details in the examples in the procedure given below. Those which you may wish to modify include:\\n\\n* The namespace. `trident` is used as the namespace in the examples, but you may prefer to use another.\\n* The name of the deployment. `mytrident` is used but you can change this to something else.\\n* The management IP address of the ONTAP storage system\\n* Login credentials (username and password) of the ONTAP storage system\\n\\nThe procedure is as follows.\\n\\n1. Read the NetApp Astra Trident documentation:\\n\\n * https://docs.netapp.com/us-en/trident/\\n * https://docs.netapp.com/us-en/trident/trident-get-started/kubernetes-deploy-operator.html\\n * https://docs.netapp.com/us-en/trident/trident-get-started/kubernetes-deploy-helm.html#deploy-the-trident-operator-and-install-astra-trident-using-helm\\n\\n The simplest method is to install using Helm; that process is described here.\\n\\n1. Download the KubeConfig from the Harvester cluster.\\n\\n * Open the web UI for your Harvester cluster\\n * In the lower left corner, click the \\"Support\\" link. This will take you to a \\"Harvester Support\\" page.\\n * Click the button labeled \\"Download KubeConfig\\". This will download a your cluster config in a file called \\"local.yaml\\" by default.\\n * Move this file to a convenient location and set your `KUBECONFIG` environment variable to the path of this file.\\n\\n1. Prepare the cluster for installation of the Helm chart.\\n\\n Before starting installation of the helm chart, special authorization must be provided to enable certain modifications to be made during the installation.\\n This addresses the issue described here: https://github.com/NetApp/trident/issues/839\\n\\n * Put the following text into a file. For this example we\'ll call it `authorize_trident.yaml`.\\n\\n ```yaml\\n ---\\n apiVersion: rbac.authorization.k8s.io/v1\\n kind: ClusterRole\\n metadata:\\n name: trident-operator-psa\\n rules:\\n - apiGroups:\\n - management.cattle.io\\n resources:\\n - projects\\n verbs:\\n - updatepsa\\n ---\\n apiVersion: rbac.authorization.k8s.io/v1\\n kind: ClusterRoleBinding\\n metadata:\\n name: trident-operator-psa\\n roleRef:\\n apiGroup: rbac.authorization.k8s.io\\n kind: ClusterRole\\n name: trident-operator-psa\\n subjects:\\n - kind: ServiceAccount\\n name: trident-operator\\n namespace: trident\\n ```\\n\\n * Apply this manifest via the command `kubectl apply -f authorize_trident.yaml`.\\n\\n1. Install the helm chart.\\n\\n * First you will need to add the Astra Trident Helm repository:\\n\\n ```shell\\n helm repo add netapp-trident https://netapp.github.io/trident-helm-chart\\n ```\\n\\n * Next, install the Helm chart. This example uses `mytrident` as the deployment name, `trident` as the namespace, and 23.07.0 as the version number to install:\\n\\n ```shell\\n helm install mytrident netapp-trident/trident-operator --version 23.07.0 --create-namespace --namespace trident\\n ```\\n\\n * The NetApp documentation describes variations on how you can do this.\\n\\n1. Download and extract the tridentctl command, which will be needed for the next few steps.\\n\\n This and the next few steps need to be performed logged into a master node of the Harvester cluster, using root access.\\n\\n ```shell\\n cd /tmp\\n curl -L -o trident-installer-23.07.0.tar.gz https://github.com/NetApp/trident/releases/download/v23.07.0/trident-installer-23.07.0.tar.gz\\n tar -xf trident-installer-23.07.0.tar.gz\\n cd trident-installer\\n ```\\n\\n1. Install a backend.\\n\\n This part is specific to Harvester.\\n\\n 1. Put the following into a text file, for example /tmp/backend.yaml\\n\\n ```yaml\\n version: 1\\n backendName: default_backend_san\\n storageDriverName: ontap-san-economy\\n managementLIF: 172.19.97.114\\n svm: default_backend\\n username: admin\\n password: password1234\\n labels:\\n name: default_backend_san\\n ```\\n\\n The LIF IP address, username, and password of this file\\n should be replaced with the management LIF and credentials\\n for the ONTAP system.\\n\\n 1. Create the backend\\n\\n ```shell\\n ./tridentctl create backend -f /tmp/backend.yaml -n trident\\n ```\\n\\n 1. Check that it is created\\n\\n ```shell\\n ./tridentctl get backend -n trident\\n ```\\n\\n1. Define a StorageClass and SnapshotClass.\\n\\n 1. Put the following into a file, for example `/tmp/storage.yaml`\\n\\n ```yaml\\n ---\\n apiVersion: storage.k8s.io/v1\\n kind: StorageClass\\n metadata:\\n name: ontap-san-economy\\n provisioner: csi.trident.netapp.io\\n parameters:\\n selector: \\"name=default_backend_san\\"\\n ---\\n apiVersion: snapshot.storage.k8s.io/v1\\n kind: VolumeSnapshotClass\\n metadata:\\n name: csi-snapclass\\n driver: csi.trident.netapp.io\\n deletionPolicy: Delete\\n ```\\n\\n 1. Apply the definitions:\\n\\n ```shell\\n kubectl apply -f /tmp/storage.yaml\\n ```\\n\\n1. Enable multipathd\\n\\n The following is required to enable multipathd.\\n This must be done on every node of the Harvester cluster, using root access.\\n The preceding steps should only be done once on a single node.\\n\\n 1. Create this file in `/oem/99_multipathd.yaml`:\\n\\n ```yaml\\n stages:\\n default:\\n - name: \\"Setup multipathd\\"\\n systemctl:\\n enable:\\n - multipathd\\n start:\\n - multipathd\\n ```\\n\\n 1. Configure `multipathd` to exclude pathnames used by Longhorn.\\n\\n This part is a little tricky. `multipathd` will automatically discover\\n device names matching a certain pattern, and attempt to set up multipathing on them.\\n Unfortunately, Longhorn\'s device names follow the same pattern, and\\n will not work correctly if `multipathd` tries to use those devices.\\n\\n Therefore the file `/etc/multipath.conf` must be set up on each node\\n so as to prevent `multipathd` from touching any of the devices\\n that Longhorn will use. Unfortunately, it is not possible to know\\n in advance which device names will be used until the volumes are attached\\n to a VM when the VM is started, or when the volumes are hot-added to a running VM.\\n The recommended method is to \\"whitelist\\" the Trident devices using device\\n properties rather than device naming. The properties to allow are the\\n device vendor and product. Here is an example of what you\'ll want in `/etc/multipath.conf`:\\n\\n ```text\\n blacklist {\\n device {\\n vendor \\"!NETAPP\\"\\n product \\"!LUN\\"\\n }\\n }\\n blacklist_exceptions {\\n device {\\n vendor \\"NETAPP\\"\\n product \\"LUN\\"\\n }\\n }\\n ```\\n\\n This example only works if NetApp is the only storage provider in the system for which `multipathd` must be used. More complex environments will require more complex configuration.\\n\\n Explicitly putting that content into `/etc/multipath.conf` will work when you start `multipathd` as described below, but the change in `/etc` will not persist across node reboots. To solve that problem, you should add another file to `/oem` that will re-generate `/etc/multipath.conf` when the node reboots. The following example will create the `/etc/multipath.conf` given in the example above, but may need to be modified for your environment if you have a more complex iSCSI configuration:\\n\\n ```text\\n stages:\\n initramfs:\\n - name: \\"Configure multipath blacklist and whitelist\\"\\n files:\\n - path: /etc/multipath.conf\\n permissions: 0644\\n owner: 0\\n group: 0\\n content: |\\n blacklist {\\n device {\\n vendor \\"!NETAPP\\"\\n product \\"!LUN\\"\\n }\\n }\\n blacklist_exceptions {\\n device {\\n vendor \\"NETAPP\\"\\n product \\"LUN\\"\\n }\\n }\\n ```\\n\\n Remember, this has to be done on every node.\\n\\n 1. Enable multipathd.\\n\\n Adding the above files to `/oem` will take effect on the next reboot of the node; `multipathd` can be enabled immediately without rebooting the node using the following commands:\\n\\n ```shell\\n systemctl enable multipathd\\n systemctl start multipathd\\n ```\\n\\n After the above steps, the `ontap-san-economy` storage class should be available when creating a volume for a Harvester VM."},{"id":"configure_priority_class_longhorn","metadata":{"permalink":"/kb/configure_priority_class_longhorn","editUrl":"https://github.com/harvester/harvesterhci.io/edit/main/kb/kb/2023-07-25/configure_priority_class_longhorn.md","source":"@site/kb/2023-07-25/configure_priority_class_longhorn.md","title":"Configure PriorityClass on Longhorn System Components","description":"Configure priority classes on Longhorn system components","date":"2023-07-25T00:00:00.000Z","formattedDate":"July 25, 2023","tags":[{"label":"harvester","permalink":"/kb/tags/harvester"},{"label":"longhorn","permalink":"/kb/tags/longhorn"},{"label":"priority class","permalink":"/kb/tags/priority-class"}],"readingTime":6.405,"truncated":false,"authors":[{"name":"Kiefer Chang","title":"Engineer Manager","url":"https://github.com/bk201","image_url":"https://github.com/bk201.png","imageURL":"https://github.com/bk201.png"}],"frontMatter":{"title":"Configure PriorityClass on Longhorn System Components","description":"Configure priority classes on Longhorn system components","slug":"configure_priority_class_longhorn","authors":[{"name":"Kiefer Chang","title":"Engineer Manager","url":"https://github.com/bk201","image_url":"https://github.com/bk201.png","imageURL":"https://github.com/bk201.png"}],"tags":["harvester","longhorn","priority class"],"hide_table_of_contents":false},"prevItem":{"title":"Using NetApp Storage on Harvester","permalink":"/kb/install_netapp_trident_csi"},"nextItem":{"title":"Package your own Toolbox Image","permalink":"/kb/package_your_own_toolbox_image"}},"content":"**Harvester v1.2.0** introduces a new enhancement where Longhorn system-managed components in newly-deployed clusters are automatically assigned a `system-cluster-critical` priority class by default. However, when upgrading your Harvester clusters from previous versions, you may notice that Longhorn system-managed components do not have any priority class set.\\n\\nThis behavior is intentional and aimed at supporting zero-downtime upgrades. Longhorn does not allow changing the `priority-class` setting when attached volumes exist. For more details, please refer to [Setting Priority Class During Longhorn Installation](https://longhorn.io/docs/1.4.3/advanced-resources/deploy/priority-class/#setting-priority-class-during-longhorn-installation)).\\n\\nThis article explains how to manually configure priority classes for Longhorn system-managed components after upgrading your Harvester cluster, ensuring that your Longhorn components have the appropriate priority class assigned and maintaining the stability and performance of your system.\\n\\n## Stop all virtual machines\\n\\nStop all virtual machines (VMs) to detach all volumes. Please back up any work before doing this.\\n1. [Login to a Harvester controller node and become root](https://docs.harvesterhci.io/v1.1/troubleshooting/os#how-to-log-into-a-harvester-node).\\n2. Get all running VMs and write down their namespaces and names:\\n\\n ```bash\\n kubectl get vmi -A\\n ```\\n\\n Alternatively, you can get this information by backing up the Virtual Machine Instance (VMI) manifests with the following command:\\n ```bash\\n kubectl get vmi -A -o json > vmi-backup.json\\n ```\\n\\n3. Shut down all VMs. Log in to all running VMs and shut them down gracefully (recommended). Or use the following command to send shutdown signals to all VMs:\\n ```bash\\n kubectl get vmi -A -o json | jq -r \'.items[] | [.metadata.name, .metadata.namespace] | @tsv\' | while IFS=$\'\\\\t\' read -r name namespace; do\\n if [ -z \\"$name\\" ]; then\\n break\\n fi\\n echo \\"Stop ${namespace}/${name}\\"\\n virtctl stop $name -n $namespace\\n done\\n ```\\n\\n :::note\\n You can also stop all VMs from the Harvester UI:\\n 1. Go to the **Virtual Machines** page.\\n 2. For each VM, select **\u22ee** > **Stop**.\\n :::\\n\\n4. Ensure there are no running VMs:\\n\\n Run the command:\\n\\n ```bash\\n kubectl get vmi -A\\n ```\\n\\n The above command must return:\\n\\n ```bash\\n No resources found\\n\\n## Scale down monitoring pods\\n\\n1. Scale down the Prometheus deployment. Run the following command and wait for all Prometheus pods to terminate:\\n\\n ```bash\\n kubectl patch -n cattle-monitoring-system prometheus/rancher-monitoring-prometheus --patch \'{\\"spec\\": {\\"replicas\\": 0}}\' --type merge && \\\\\\n sleep 5 && \\\\\\n kubectl rollout status --watch=true -n cattle-monitoring-system statefulset/prometheus-rancher-monitoring-prometheus\\n ```\\n\\n A sample output looks like this:\\n\\n ```\\n prometheus.monitoring.coreos.com/rancher-monitoring-prometheus patched\\n statefulset rolling update complete 0 pods at revision prometheus-rancher-monitoring-prometheus-cbf6bd5f7...\\n ```\\n\\n2. Scale down the AlertManager deployment. Run the following command and wait for all AlertManager pods to terminate:\\n\\n ```bash\\n kubectl patch -n cattle-monitoring-system alertmanager/rancher-monitoring-alertmanager --patch \'{\\"spec\\": {\\"replicas\\": 0}}\' --type merge && \\\\\\n sleep 5 && \\\\\\n kubectl rollout status --watch=true -n cattle-monitoring-system statefulset/alertmanager-rancher-monitoring-alertmanager\\n ```\\n\\n A sample output looks like this:\\n\\n ```\\n alertmanager.monitoring.coreos.com/rancher-monitoring-alertmanager patched\\n statefulset rolling update complete 0 pods at revision alertmanager-rancher-monitoring-alertmanager-c8c459dff...\\n ```\\n\\n3. Scale down the Grafana deployment. Run the following command and wait for all Grafana pods to terminate:\\n\\n ```bash\\n kubectl scale --replicas=0 deployment/rancher-monitoring-grafana -n cattle-monitoring-system && \\\\\\n sleep 5 && \\\\\\n kubectl rollout status --watch=true -n cattle-monitoring-system deployment/rancher-monitoring-grafana\\n ```\\n\\n A sample output looks like this:\\n\\n ```\\n deployment.apps/rancher-monitoring-grafana scaled\\n deployment \\"rancher-monitoring-grafana\\" successfully rolled out\\n ```\\n\\n## Scale down vm-import-controller pods\\n\\n1. Check if the [`vm-import-controller`](https://docs.harvesterhci.io/v1.1/advanced/vmimport) addon is enabled and configured with a persistent volume with the following command:\\n\\n ```bash\\n kubectl get pvc -n harvester-system harvester-vm-import-controller\\n ```\\n\\n If the above command returns an output like this, you must scale down the `vm-import-controller` pod. Otherwise, you can skip the following step.\\n ```\\n NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE\\n harvester-vm-import-controller Bound pvc-eb23e838-4c64-4650-bd8f-ba7075ab0559 200Gi RWO harvester-longhorn 2m53s\\n ```\\n\\n2. Scale down the `vm-import-controller` pods with the following command:\\n\\n ```bash\\n kubectl scale --replicas=0 deployment/harvester-vm-import-controller -n harvester-system && \\\\\\n sleep 5 && \\\\\\n kubectl rollout status --watch=true -n harvester-system deployment/harvester-vm-import-controller\\n ```\\n\\n A sample output looks like this:\\n\\n ```\\n deployment.apps/harvester-vm-import-controller scaled\\n deployment \\"harvester-vm-import-controller\\" successfully rolled out\\n ```\\n\\n## Set the `priority-class` setting\\n\\n1. Before applying the `priority-class` setting, you need to verify all volumes are detached. Run the following command to verify the `STATE` of each volume is `detached`:\\n\\n ```bash\\n kubectl get volumes.longhorn.io -A\\n ```\\n\\n Verify the output looks like this:\\n ```\\n NAMESPACE NAME STATE ROBUSTNESS SCHEDULED SIZE NODE AGE\\n longhorn-system pvc-5743fd02-17a3-4403-b0d3-0e9b401cceed detached unknown 5368709120 15d\\n longhorn-system pvc-7e389fe8-984c-4049-9ba8-5b797cb17278 detached unknown 53687091200 15d\\n longhorn-system pvc-8df64e54-ecdb-4d4e-8bab-28d81e316b8b detached unknown 2147483648 15d\\n longhorn-system pvc-eb23e838-4c64-4650-bd8f-ba7075ab0559 detached unknown 214748364800 11m\\n ```\\n\\n1. Set the `priority-class` setting with the following command:\\n\\n ```bash\\n kubectl patch -n longhorn-system settings.longhorn.io priority-class --patch \'{\\"value\\": \\"system-cluster-critical\\"}\' --type merge\\n ```\\n\\n Longhorn system-managed pods will restart and then you need to check if all the system-managed components have a priority class set:\\n\\n Get the value of the priority class `system-cluster-critical`:\\n ```bash\\n kubectl get priorityclass system-cluster-critical\\n ```\\n\\n Verify the output looks like this:\\n ```\\n NAME VALUE GLOBAL-DEFAULT AGE\\n system-cluster-critical 2000000000 false 15d\\n ```\\n\\n3. Use the following command to get pods\' priority in the `longhorn-system` namespace:\\n\\n ```bash\\n kubectl get pods -n longhorn-system -o custom-columns=\\"Name\\":metadata.name,\\"Priority\\":.spec.priority\\n ```\\n\\n4. Verify all system-managed components\' pods have the correct priority. System-managed components include:\\n\\n - `csi-attacher`\\n - `csi-provisioner`\\n - `csi-resizer`\\n - `csi-snapshotter`\\n - `engine-image-ei`\\n - `instance-manager-e`\\n - `instance-manager-r`\\n - `longhorn-csi-plugin`\\n\\n## Scale up vm-import-controller pods\\n\\nIf you scale down the `vm-import-controller` pods, you must scale it up again. \\n\\n1. Scale up the `vm-import-controller` pod. Run the command: \\n\\n ```bash\\n kubectl scale --replicas=1 deployment/harvester-vm-import-controller -n harvester-system && \\\\\\n sleep 5 && \\\\\\n kubectl rollout status --watch=true -n harvester-system deployment/harvester-vm-import-controller\\n ```\\n\\n A sample output looks like this:\\n\\n ```\\n deployment.apps/harvester-vm-import-controller scaled\\n Waiting for deployment \\"harvester-vm-import-controller\\" rollout to finish: 0 of 1 updated replicas are available...\\n deployment \\"harvester-vm-import-controller\\" successfully rolled out\\n ```\\n\\n2. Verify `vm-import-controller` is running using the following command:\\n ```bash\\n kubectl get pods --selector app.kubernetes.io/instance=vm-import-controller -A\\n ```\\n\\n A sample output looks like this, the pod\'s `STATUS` must be `Running`:\\n ```\\n NAMESPACE NAME READY STATUS RESTARTS AGE\\n harvester-system harvester-vm-import-controller-6bd8f44f55-m9k86 1/1 Running 0 4m53s\\n ```\\n\\n## Scale up monitoring pods\\n\\n1. Scale up the Prometheus deployment. Run the following command and wait for all Prometheus pods to roll out:\\n\\n ```bash\\n kubectl patch -n cattle-monitoring-system prometheus/rancher-monitoring-prometheus --patch \'{\\"spec\\": {\\"replicas\\": 1}}\' --type merge && \\\\\\n sleep 5 && \\\\\\n kubectl rollout status --watch=true -n cattle-monitoring-system statefulset/prometheus-rancher-monitoring-prometheus\\n ```\\n\\n A sample output looks like:\\n ```\\n prometheus.monitoring.coreos.com/rancher-monitoring-prometheus patched\\n Waiting for 1 pods to be ready...\\n statefulset rolling update complete 1 pods at revision prometheus-rancher-monitoring-prometheus-cbf6bd5f7...\\n ```\\n\\n2. Scale down the AlertManager deployment. Run the following command and wait for all AlertManager pods to roll out:\\n\\n ```bash\\n kubectl patch -n cattle-monitoring-system alertmanager/rancher-monitoring-alertmanager --patch \'{\\"spec\\": {\\"replicas\\": 1}}\' --type merge && \\\\\\n sleep 5 && \\\\\\n kubectl rollout status --watch=true -n cattle-monitoring-system statefulset/alertmanager-rancher-monitoring-alertmanager\\n ```\\n\\n A sample output looks like this:\\n\\n ```\\n alertmanager.monitoring.coreos.com/rancher-monitoring-alertmanager patched\\n Waiting for 1 pods to be ready...\\n statefulset rolling update complete 1 pods at revision alertmanager-rancher-monitoring-alertmanager-c8bd4466c...\\n ```\\n\\n3. Scale down the Grafana deployment. Run the following command and wait for all Grafana pods to roll out:\\n\\n ```bash\\n kubectl scale --replicas=1 deployment/rancher-monitoring-grafana -n cattle-monitoring-system && \\\\\\n sleep 5 && \\\\\\n kubectl rollout status --watch=true -n cattle-monitoring-system deployment/rancher-monitoring-grafana\\n ```\\n\\n A sample output looks like this:\\n\\n ```\\n deployment.apps/rancher-monitoring-grafana scaled\\n Waiting for deployment \\"rancher-monitoring-grafana\\" rollout to finish: 0 of 1 updated replicas are available...\\n deployment \\"rancher-monitoring-grafana\\" successfully rolled out\\n ```\\n\\n## Start virtual machines\\n\\n1. Start a VM with the command:\\n\\n ```bash\\n virtctl start $name -n $namespace\\n ```\\n\\n Replace `$name` with the VM\'s name and `$namespace` with the VM\'s namespace. You can list all virtual machines with the command:\\n\\n ```bash\\n kubectl get vms -A\\n ```\\n\\n :::note\\n You can also stop all VMs from the Harvester UI:\\n 1. Go to the **Virtual Machines** page.\\n 2. For each VM, select **\u22ee** > **Start**.\\n :::\\n\\n Alternatively, you can start all running VMs with the following command:\\n\\n ```bash\\n cat vmi-backup.json | jq -r \'.items[] | [.metadata.name, .metadata.namespace] | @tsv\' | while IFS=$\'\\\\t\' read -r name namespace; do\\n if [ -z \\"$name\\" ]; then\\n break\\n fi\\n echo \\"Start ${namespace}/${name}\\"\\n virtctl start $name -n $namespace || true\\n done\\n ```"},{"id":"package_your_own_toolbox_image","metadata":{"permalink":"/kb/package_your_own_toolbox_image","editUrl":"https://github.com/harvester/harvesterhci.io/edit/main/kb/kb/2023-07-06/package_your_own_toolbox_image.md","source":"@site/kb/2023-07-06/package_your_own_toolbox_image.md","title":"Package your own Toolbox Image","description":"How to package your own toolbox image","date":"2023-07-06T00:00:00.000Z","formattedDate":"July 6, 2023","tags":[{"label":"debug","permalink":"/kb/tags/debug"},{"label":"harvester","permalink":"/kb/tags/harvester"},{"label":"container","permalink":"/kb/tags/container"}],"readingTime":1.655,"truncated":false,"authors":[{"name":"Vicente Cheng","title":"Senior Software Engineer","url":"https://github.com/Vicente-Cheng","image_url":"https://github.com/Vicente-Cheng.png","imageURL":"https://github.com/Vicente-Cheng.png"}],"frontMatter":{"title":"Package your own Toolbox Image","description":"How to package your own toolbox image","slug":"package_your_own_toolbox_image","authors":[{"name":"Vicente Cheng","title":"Senior Software Engineer","url":"https://github.com/Vicente-Cheng","image_url":"https://github.com/Vicente-Cheng.png","imageURL":"https://github.com/Vicente-Cheng.png"}],"tags":["debug","harvester","container"],"hide_table_of_contents":false},"prevItem":{"title":"Configure PriorityClass on Longhorn System Components","permalink":"/kb/configure_priority_class_longhorn"},"nextItem":{"title":"Scan and Repair Root Filesystem of VirtualMachine","permalink":"/kb/scan-and-repair-vm-root-filesystem"}},"content":"Harvester OS is designed as an immutable operating system, which means you cannot directly install additional packages on it. While there is a way to [install packages](https://docs.harvesterhci.io/dev/troubleshooting/os#how-can-i-install-packages-why-are-some-paths-read-only), it is strongly advised against doing so, as it may lead to system instability.\\n\\nIf you only want to debug with the system, the preferred way is to package the toolbox image with all the needed packages. \\n\\nThis article shares how to package your toolbox image and how to install any packages on the toolbox image that help you debug the system.\\n\\nFor example, if you want to analyze a storage performance issue, you can install `blktrace` on the toolbox image.\\n\\n\\n## Create a Dockerfile\\n\\n```bash\\nFROM opensuse/leap:15.4\\n\\n# Install blktrace\\nRUN zypper in -y \\\\\\n blktrace\\n\\nRUN zypper clean --all\\n```\\n\\n## Build the image and push\\n```bash\\n# assume you are in the directory of Dockerfile\\n$ docker build -t harvester/toolbox:dev .\\n.\\n.\\n.\\nnaming to docker.io/harvester/toolbox:dev ...\\n$ docker push harvester/toolbox:dev\\n.\\n.\\nd4b76d0683d4: Pushed \\na605baa225e2: Pushed \\n9e9058bdf63c: Layer already exists \\n```\\n\\nAfter you build and push the image, you can run the toolbox using this image to trace storage performance.\\n\\n## Run the toolbox\\n```bash\\n# use `privileged` flag only when you needed. blktrace need debugfs, so I add extra mountpoint.\\ndocker run -it --privileged -v /sys/kernel/debug/:/sys/kernel/debug/ --rm harvester/toolbox:dev bash\\n\\n# test blktrace\\n6ffa8eda3aaf:/ $ blktrace -d /dev/nvme0n1 -o - | blkparse -i -\\n259,0 10 3414 0.020814875 34084 Q WS 2414127984 + 8 [fio]\\n259,0 10 3415 0.020815190 34084 G WS 2414127984 + 8 [fio]\\n259,0 10 3416 0.020815989 34084 C WS 3206896544 + 8 [0]\\n259,0 10 3417 0.020816652 34084 C WS 2140319184 + 8 [0]\\n259,0 10 3418 0.020817992 34084 P N [fio]\\n259,0 10 3419 0.020818227 34084 U N [fio] 1\\n259,0 10 3420 0.020818437 34084 D WS 2414127984 + 8 [fio]\\n259,0 10 3421 0.020821826 34084 Q WS 1743934904 + 8 [fio]\\n259,0 10 3422 0.020822150 34084 G WS 1743934904 + 8 [fio]\\n\\n```"},{"id":"scan-and-repair-vm-root-filesystem","metadata":{"permalink":"/kb/scan-and-repair-vm-root-filesystem","editUrl":"https://github.com/harvester/harvesterhci.io/edit/main/kb/kb/2023-02-01/scan_and_repair_filesystem.md","source":"@site/kb/2023-02-01/scan_and_repair_filesystem.md","title":"Scan and Repair Root Filesystem of VirtualMachine","description":"Scan and repair root filesystem of VM","date":"2023-02-01T00:00:00.000Z","formattedDate":"February 1, 2023","tags":[{"label":"storage","permalink":"/kb/tags/storage"},{"label":"longhorn","permalink":"/kb/tags/longhorn"},{"label":"root","permalink":"/kb/tags/root"},{"label":"filesystem","permalink":"/kb/tags/filesystem"}],"readingTime":3.37,"truncated":false,"authors":[{"name":"Vicente Cheng","title":"Senior Software Engineer","url":"https://github.com/Vicente-Cheng","image_url":"https://github.com/Vicente-Cheng.png","imageURL":"https://github.com/Vicente-Cheng.png"}],"frontMatter":{"title":"Scan and Repair Root Filesystem of VirtualMachine","description":"Scan and repair root filesystem of VM","slug":"scan-and-repair-vm-root-filesystem","authors":[{"name":"Vicente Cheng","title":"Senior Software Engineer","url":"https://github.com/Vicente-Cheng","image_url":"https://github.com/Vicente-Cheng.png","imageURL":"https://github.com/Vicente-Cheng.png"}],"tags":["storage","longhorn","root","filesystem"],"hide_table_of_contents":false},"prevItem":{"title":"Package your own Toolbox Image","permalink":"/kb/package_your_own_toolbox_image"},"nextItem":{"title":"Evicting Replicas From a Disk (the CLI way)","permalink":"/kb/evicting-replicas-from-a-disk-the-cli-way"}},"content":"In earlier versions of Harvester (v1.0.3 and prior), Longhorn volumes may get corrupted during the replica rebuilding process (reference: [Analysis: Potential Data/Filesystem Corruption](https://longhorn.io/kb/troubleshooting-volume-filesystem-corruption/#solution)). In Harvester v1.1.0 and later versions, the Longhorn team has fixed this issue. This article covers manual steps you can take to scan the VM\'s filesystem and repair it if needed.\\n\\n\\n## Stop The VM And Backup Volume\\n\\nBefore you scan the filesystem, it is recommend you back up the volume first. For an example, refer to the following steps to stop the VM and backup the volume.\\n\\n- Find the target VM.\\n\\n![finding the target VM](./imgs/finding_the_target_vm.png)\\n\\n- Stop the target VM.\\n\\n![Stop the target VM](./imgs/stop_the_target_vm.png)\\n\\nThe target VM is stopped and the related volumes are detached. Now go to the Longhorn UI to backup this volume.\\n\\n- Enable `Developer Tools & Features` (Preferences -> Enable Developer Tools & Features).\\n\\n![Preferences then enable developer mode](./imgs/preferences_enable_developer_mode.png)\\n![Enable the developer mode](./imgs/enable_the_developer_mode.png)\\n\\n- Click the `\u22ee` button and select **Edit Config** to edit the config page of the VM.\\n\\n![goto edit config page of VM](./imgs/goto_vm_edit_config_page.png)\\n\\n- Go to the `Volumes` tab and select `Check volume details.`\\n\\n![link to longhorn volume page](./imgs/link_to_longhorn_volume.png)\\n\\n- Click the dropdown menu on the right side and select \'Attach\' to attach the volume again. \\n\\n![attach this volume again](./imgs/attach_this_volume_again.png)\\n\\n- Select the attached node. \\n\\n![choose the attached node](./imgs/choose_the_attached_node.png)\\n\\n- Check the volume attached under `Volume Details` and select `Take Snapshot` on this volume page.\\n\\n![take snapshot on volume page](./imgs/take_snapshot_on_volume_page.png)\\n\\n- Confirm that the snapshot is ready.\\n\\n![check the snapshot is ready](./imgs/check_the_snapshot_is_ready.png)\\n\\nNow that you completed the volume backup, you need to scan and repair the root filesystem.\\n\\n## Scanning the root filesystem and repairing\\n\\nThis section will introduce how to scan the filesystem (e.g., XFS, EXT4) using related tools.\\n\\nBefore scanning, you need to know the filesystem\'s device/partition.\\n\\n- Identify the filesystem\'s device by checking the major and minor numbers of that device.\\n\\n1. Obtain the major and minor numbers from the listed volume information.\\n \\n In the following example, the volume name is `pvc-ea7536c0-301f-479e-b2a2-e40ddc864b58`.\\n ```\\n harvester-node-0:~ # ls /dev/longhorn/pvc-ea7536c0-301f-479e-b2a2-e40ddc864b58 -al\\n brw-rw---- 1 root root 8, 0 Oct 23 14:43 /dev/longhorn/pvc-ea7536c0-301f-479e-b2a2-e40ddc864b58\\n ```\\n The output indicates that the major and minor numbers are `8:0`.\\n \\n2. Obtain the device name from the output of the `lsblk` command.\\n ```\\n harvester-node-0:~ # lsblk\\n NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINTS\\n loop0 7:0 0 3G 1 loop /\\n sda 8:0 0 40G 0 disk\\n \u251c\u2500sda1 8:1 0 2M 0 part\\n \u251c\u2500sda2 8:2 0 20M 0 part\\n \u2514\u2500sda3 8:3 0 40G 0 part\\n ```\\n The output indicates that `8:0` are the major and minor numbers of the device named `sda`. Therefore, `/dev/sda` is related to the volume named `pvc-ea7536c0-301f-479e-b2a2-e40ddc864b58`.\\n\\n- You should now know the filesystem\'s partition. In the example below, sda3 is the filesystem\'s partition.\\n- Use the Filesystem toolbox image to scan and repair.\\n\\n```\\n# docker run -it --rm --privileged registry.opensuse.org/isv/rancher/harvester/toolbox/main/fs-toolbox:latest -- bash\\n```\\n\\nThen we try to scan with this target device.\\n\\n### XFS\\n\\nWhen scanning an XFS filesystem, use the `xfs_repair` command and specify the problematic partition of the device.\\n\\nIn the following example, `/dev/sda3` is the problematic partition.\\n```\\n# xfs_repair -n /dev/sda3\\n```\\n\\nTo repair the corrupted partition, run the following command.\\n\\n```\\n# xfs_repair /dev/sda3\\n```\\n\\n### EXT4\\n\\nWhen scanning a EXT4 filesystem, use the `e2fsck` command as follows, where the `/dev/sde1` is the problematic partition of the device.\\n\\n```\\n# e2fsck -f /dev/sde1\\n```\\n\\nTo repair the corrupted partition, run the following command.\\n\\n```\\n# e2fsck -fp /dev/sde1\\n```\\n\\n\\nAfter using the \'e2fsck\' command, you should also see logs related to scanning and repairing the partition. Scanning and repairing the corrupted partition is successful if there are no errors in these logs. \\n\\n\\n## Detach and Start VM again.\\n\\nAfter the corrupted partition is scanned and repaired, detach the volume and try to start the related VM again.\\n\\n- Detach the volume from the Longhorn UI.\\n\\n![detach volume on longhorn UI](./imgs/detach_volume.png)\\n\\n- Start the related VM again from the Harvester UI.\\n\\n![Start VM again](./imgs/start_vm_again.png)\\n\\nYour VM should now work normally."},{"id":"evicting-replicas-from-a-disk-the-cli-way","metadata":{"permalink":"/kb/evicting-replicas-from-a-disk-the-cli-way","editUrl":"https://github.com/harvester/harvesterhci.io/edit/main/kb/kb/2023-01-12/evict_replicas_from_a_disk.md","source":"@site/kb/2023-01-12/evict_replicas_from_a_disk.md","title":"Evicting Replicas From a Disk (the CLI way)","description":"Evicting replicas from a disk (the CLI way)","date":"2023-01-12T00:00:00.000Z","formattedDate":"January 12, 2023","tags":[{"label":"storage","permalink":"/kb/tags/storage"},{"label":"longhorn","permalink":"/kb/tags/longhorn"},{"label":"disk","permalink":"/kb/tags/disk"}],"readingTime":1.935,"truncated":false,"authors":[{"name":"Kiefer Chang","title":"Engineer Manager","url":"https://github.com/bk201","image_url":"https://github.com/bk201.png","imageURL":"https://github.com/bk201.png"}],"frontMatter":{"title":"Evicting Replicas From a Disk (the CLI way)","description":"Evicting replicas from a disk (the CLI way)","slug":"evicting-replicas-from-a-disk-the-cli-way","authors":[{"name":"Kiefer Chang","title":"Engineer Manager","url":"https://github.com/bk201","image_url":"https://github.com/bk201.png","imageURL":"https://github.com/bk201.png"}],"tags":["storage","longhorn","disk"],"hide_table_of_contents":false},"prevItem":{"title":"Scan and Repair Root Filesystem of VirtualMachine","permalink":"/kb/scan-and-repair-vm-root-filesystem"},"nextItem":{"title":"NIC Naming Scheme","permalink":"/kb/nic-naming-scheme"}},"content":"Harvester replicates volumes data across disks in a cluster. Before removing a disk, the user needs to evict replicas on the disk to other disks to preserve the volumes\' configured availability. For more information about eviction in Longhorn, please check [Evicting Replicas on Disabled Disks or Nodes](https://longhorn.io/docs/1.3.2/volumes-and-nodes/disks-or-nodes-eviction/).\\n\\n## Preparation\\n\\nThis document describes how to evict Longhorn disks using the `kubectl` command. Before that, users must ensure the environment is set up correctly.\\nThere are two recommended ways to do this:\\n\\n1. Log in to any management node and switch to root (`sudo -i`).\\n1. Download Kubeconfig file and use it locally\\n - Install `kubectl` and `yq` program manually.\\n - Open Harvester GUI, click `support` at the bottom left of the page and click `Download KubeConfig` to download the Kubeconfig file.\\n - Set the Kubeconfig file\'s path to `KUBECONFIG` environment variable. For example, `export KUBECONFIG=/path/to/kubeconfig`.\\n\\n\\n## Evicting replicas from a disk\\n\\n1. List Longhorn nodes (names are identical to Kubernetes nodes):\\n\\n ```\\n kubectl get -n longhorn-system nodes.longhorn.io\\n ```\\n\\n Sample output:\\n\\n ```\\n NAME READY ALLOWSCHEDULING SCHEDULABLE AGE\\n node1 True true True 24d\\n node2 True true True 24d\\n node3 True true True 24d\\n ```\\n\\n1. List disks on a node. Assume we want to evict replicas of a disk on `node1`:\\n\\n ```\\n kubectl get -n longhorn-system nodes.longhorn.io node1 -o yaml | yq e \'.spec.disks\'\\n ```\\n\\n Sample output:\\n\\n ```\\n default-disk-ed7af10f5b8356be:\\n allowScheduling: true\\n evictionRequested: false\\n path: /var/lib/harvester/defaultdisk\\n storageReserved: 36900254515\\n tags: []\\n ```\\n\\n1. Assume disk `default-disk-ed7af10f5b8356be` is the target we want to evict replicas out of.\\n\\n Edit the node:\\n ```\\n kubectl edit -n longhorn-system nodes.longhorn.io node1 \\n ```\\n\\n Update these two fields and save:\\n - `spec.disks..allowScheduling` to `false`\\n - `spec.disks..evictionRequested` to `true`\\n\\n Sample editing:\\n\\n ```\\n default-disk-ed7af10f5b8356be:\\n allowScheduling: false\\n evictionRequested: true\\n path: /var/lib/harvester/defaultdisk\\n storageReserved: 36900254515\\n tags: []\\n ```\\n\\n1. Wait for all replicas on the disk to be evicted.\\n\\n Get current scheduled replicas on the disk:\\n ```\\n kubectl get -n longhorn-system nodes.longhorn.io node1 -o yaml | yq e \'.status.diskStatus.default-disk-ed7af10f5b8356be.scheduledReplica\'\\n ```\\n\\n Sample output:\\n ```\\n pvc-86d3d212-d674-4c64-b69b-4a2eb1df2272-r-7b422db7: 5368709120\\n pvc-b06f0b09-f30c-4936-8a2a-425b993dd6cb-r-bb0fa6b3: 2147483648\\n pvc-b844bcc6-3b06-4367-a136-3909251cb560-r-08d1ab3c: 53687091200\\n pvc-ea6e0dff-f446-4a38-916a-b3bea522f51c-r-193ca5c6: 10737418240\\n ```\\n\\n Run the command repeatedly, and the output should eventually become an empty map:\\n ```\\n {}\\n ```\\n\\n This means Longhorn evicts replicas on the disk to other disks.\\n\\n :::note\\n \\n If a replica always stays in a disk, please open the [Longhorn GUI](https://docs.harvesterhci.io/v1.1/troubleshooting/harvester#access-embedded-rancher-and-longhorn-dashboards) and check if there is free space on other disks.\\n :::"},{"id":"nic-naming-scheme","metadata":{"permalink":"/kb/nic-naming-scheme","editUrl":"https://github.com/harvester/harvesterhci.io/edit/main/kb/kb/2022-04-06/nic_naming_scheme.md","source":"@site/kb/2022-04-06/nic_naming_scheme.md","title":"NIC Naming Scheme","description":"NIC Naming Scheme changed after upgrading to v1.0.1","date":"2022-04-06T00:00:00.000Z","formattedDate":"April 6, 2022","tags":[{"label":"network","permalink":"/kb/tags/network"}],"readingTime":1.825,"truncated":false,"authors":[{"name":"Date Huang","title":"Software Engineer","url":"https://github.com/tjjh89017","image_url":"https://github.com/tjjh89017.png","imageURL":"https://github.com/tjjh89017.png"}],"frontMatter":{"title":"NIC Naming Scheme","descripion":"NIC Naming Scheme Change","slug":"nic-naming-scheme","authors":[{"name":"Date Huang","title":"Software Engineer","url":"https://github.com/tjjh89017","image_url":"https://github.com/tjjh89017.png","imageURL":"https://github.com/tjjh89017.png"}],"tags":["network"],"hide_table_of_contents":false},"prevItem":{"title":"Evicting Replicas From a Disk (the CLI way)","permalink":"/kb/evicting-replicas-from-a-disk-the-cli-way"},"nextItem":{"title":"Multiple NICs VM Connectivity","permalink":"/kb/multiple-nics-vm-connectivity"}},"content":"## NIC Naming Scheme changed after upgrading to v1.0.1\\n\\n`systemd` in OpenSUSE Leap 15.3 which is the base OS of Harvester is upgraded to `246.16-150300.7.39.1`. In this version, `systemd` will enable additional naming scheme `sle15-sp3` which is `v238` with `bridge_no_slot`. When there is a PCI bridge associated with NIC, `systemd` will never generate `ID_NET_NAME_SLOT` and naming policy in `/usr/lib/systemd/network/99-default.link` will fallback to `ID_NET_NAME_PATH`. According to this change, NIC names might be changed in your Harvester nodes during the upgrade process from `v1.0.0` to `v1.0.1-rc1` or above, and it will cause network issues that are associated with NIC names.\\n\\n## Effect Settings and Workaround\\n\\n### Startup Network Configuration\\n\\nNIC name changes will need to update the name in `/oem/99_custom.yaml`. You could use [migration script](https://github.com/harvester/upgrade-helpers/blob/main/hack/udev_v238_sle15-sp3.py) to change the NIC names which are associated with a PCI bridge.\\n\\n:::tip\\nYou could find an identical machine to test naming changes before applying the configuration to production machines\\n:::\\n\\nYou could simply execute the script with root account in `v1.0.0` via\\n```bash\\n# python3 udev_v238_sle15-sp3.py\\n```\\n\\nIt will output the patched configuration to the screen and you could compare it to the original one to ensure there is no exception. (e.g. We could use `vimdiff` to check the configuration)\\n```bash\\n# python3 udev_v238_sle15-spe3.py > /oem/test\\n# vimdiff /oem/test /oem/99_custom.yaml\\n```\\n\\nAfter checking the result, we could execute the script with `--really-want-to-do` to override the configuration. It will also back up the original configuration file with a timestamp before patching it.\\n```bash\\n# python3 udev_v238_sle15-sp3.py --really-want-to-do\\n```\\n\\n### Harvester VLAN Network Configuration\\n\\nIf your VLAN network is associated with NIC name directly without `bonding`, you will need to migrate `ClusterNetwork` and `NodeNetwork` with the previous section together.\\n\\n:::note\\nIf your VLAN network is associated with the `bonding` name in `/oem/99_custom.yaml`, you could skip this section.\\n:::\\n\\n#### Modify ClusterNetworks\\n\\nYou need to modify `ClusterNetworks` via \\n```bash\\n$ kubectl edit clusternetworks vlan\\n```\\nsearch this pattern\\n```yaml\\nconfig:\\n defaultPhysicalNIC: \\n```\\nand change to new NIC name\\n\\n#### Modify NodeNetworks\\n\\nYou need to modify `NodeNetworks` via\\n```bash\\n$ kubectl edit nodenetworks -vlan\\n```\\nsearch this pattern\\n```yaml\\nspec:\\n nic: \\n```\\nand change to new NIC name"},{"id":"multiple-nics-vm-connectivity","metadata":{"permalink":"/kb/multiple-nics-vm-connectivity","editUrl":"https://github.com/harvester/harvesterhci.io/edit/main/kb/kb/2022-03-10/multiple_nics_vm_connectivity.md","source":"@site/kb/2022-03-10/multiple_nics_vm_connectivity.md","title":"Multiple NICs VM Connectivity","description":"What is the default behavior of a VM with multiple NICs","date":"2022-03-10T00:00:00.000Z","formattedDate":"March 10, 2022","tags":[{"label":"vm","permalink":"/kb/tags/vm"},{"label":"network","permalink":"/kb/tags/network"}],"readingTime":3.955,"truncated":false,"authors":[{"name":"Date Huang","title":"Software Engineer","url":"https://github.com/tjjh89017","image_url":"https://github.com/tjjh89017.png","imageURL":"https://github.com/tjjh89017.png"}],"frontMatter":{"title":"Multiple NICs VM Connectivity","descripion":"How to deal VMs with multiple NICs in Harvester","slug":"multiple-nics-vm-connectivity","authors":[{"name":"Date Huang","title":"Software Engineer","url":"https://github.com/tjjh89017","image_url":"https://github.com/tjjh89017.png","imageURL":"https://github.com/tjjh89017.png"}],"tags":["vm","network"],"hide_table_of_contents":false},"prevItem":{"title":"NIC Naming Scheme","permalink":"/kb/nic-naming-scheme"},"nextItem":{"title":"VM Scheduling","permalink":"/kb/vm-scheduling"}},"content":"## What is the default behavior of a VM with multiple NICs\\n\\nIn [some scenarios](https://github.com/harvester/harvester/issues/1059), you\'ll setup two or more NICs in your VM to serve different networking purposes. If all networks are setup by default with DHCP, you might get random connectivity issues. And while it might get fixed after rebooting the VM, it still will lose connection randomly after some period.\\n\\n## How-to identify connectivity issues\\n\\nIn a Linux VM, you can use commands from the `iproute2` package to identify the default route.\\n\\nIn your VM, execute the following command:\\n```bash\\nip route show default\\n```\\n:::tip\\nIf you get the `access denied` error, please run the command using `sudo`\\n:::\\n \\nThe output of this command will only show the default route with the gateway and VM IP of the primary network interface (`eth0` in the example below).\\n```\\ndefault via dev eth0 proto dhcp src metric 100\\n```\\n\\nHere is the full example:\\n```\\n$ ip route show default\\ndefault via 192.168.0.254 dev eth0 proto dhcp src 192.168.0.100 metric 100\\n```\\n\\nHowever, if the issue covered in this KB occurs, you\'ll only be able to connect to the VM via the VNC or serial console.\\n\\nOnce connected, you can run again the same command as before:\\n```bash\\n$ ip route show default\\n```\\n\\nHowever, this time you\'ll get a default route with an incorrect gateway IP.\\nFor example:\\n```\\ndefault via dev eth0 proto dhcp src metric 100\\n```\\n\\n## Why do connectivity issues occur randomly\\n\\nIn a standard setup, cloud-based VMs typically use DHCP for their NICs configuration. It will set an IP and a gateway for each NIC. Lastly, a default route to the gateway IP will also be added, so you can use its IP to connect to the VM.\\n\\nHowever, Linux distributions start multiple DHCP clients at the same time and do not have a **priority** system. This means that if you have two or more NICs configured with DHCP, the client will enter a **race condition** to configure the default route. And depending on the currently running Linux distribution DHCP script, there is no guarantee which default route will be configured.\\n\\nAs the default route might change in every DHCP renewing process or after every OS reboot, this will create network connectivity issues.\\n\\n## How to avoid the random connectivity issues\\n\\nYou can easily avoid these connectivity issues by having only one NIC attached to the VM and having only one IP and one gateway configured.\\n\\nHowever, for VMs in more complex infrastructures, it is often not possible to use just one NIC. For example, if your infrastructure has a storage network and a service network. For security reasons, the storage network will be isolated from the service network and have a separate subnet. In this case, you must have two NICs to connect to both the service and storage networks.\\n\\nYou can choose a solution below that meets your requirements and security policy.\\n\\n### Disable DHCP on secondary NIC\\n\\nAs mentioned above, the problem is caused by a `race condition` between two DHCP clients. One solution to avoid this problem is to disable DHCP for all NICs and configure them with static IPs only. Likewise, you can configure the secondary NIC with a static IP and keep the primary NIC enabled with DHCP.\\n\\n1. To configure the primary NIC with a static IP (`eth0` in this example), you can edit the file `/etc/sysconfig/network/ifcfg-eth0` with the following values:\\n\\n```\\nBOOTPROTO=\'static\'\\nIPADDR=\'192.168.0.100\'\\nNETMASK=\'255.255.255.0\'\\n```\\n\\nAlternatively, if you want to reserve the primary NIC using DHCP (`eth0` in this example), use the following values instead:\\n\\n```\\nBOOTPROTO=\'dhcp\'\\nDHCLIENT_SET_DEFAULT_ROUTE=\'yes\'\\n```\\n\\n\\n2. You need to configure the default route by editing the file `/etc/sysconfig/network/ifroute-eth0` (if you configured the primary NIC using DHCP, skip this step):\\n\\n\\n```\\n# Destination Dummy/Gateway Netmask Interface\\ndefault 192.168.0.254 - eth0\\n```\\n\\n:::warning\\nDo not put other default route for your secondary NIC\\n:::\\n \\n3. Finally, configure a static IP for the secondary NIC by editing the file `/etc/sysconfig/network/ifcfg-eth1`:\\n\\n```\\nBOOTPROTO=\'static\'\\nIPADDR=\'10.0.0.100\'\\nNETMASK=\'255.255.255.0\'\\n```\\n\\n#### Cloud-Init config\\n\\n```yaml\\nnetwork:\\n version: 1\\n config:\\n - type: physical\\n name: eth0\\n subnets:\\n - type: dhcp\\n - type: physical\\n name: eth1\\n subnets:\\n - type: static\\n address: 10.0.0.100/24\\n```\\n \\n### Disable secondary NIC default route from DHCP\\n\\nIf your secondary NIC requires to get its IP from DHCP, you\'ll need to disable the secondary NIC default route configuration.\\n\\n1. Confirm that the primary NIC configures its default route in the file `/etc/sysconfig/network/ifcfg-eth0`:\\n\\n```\\nBOOTPROTO=\'dhcp\'\\nDHCLIENT_SET_DEFAULT_ROUTE=\'yes\'\\n```\\n\\n2. Disable the secondary NIC default route configuration by editing the file `/etc/sysconfig/network/ifcfg-eth1`:\\n\\n```\\nBOOTPROTO=\'dhcp\'\\nDHCLIENT_SET_DEFAULT_ROUTE=\'no\'\\n```\\n\\n#### Cloud-Init config\\n\\nThis solution is not available in Cloud-Init. Cloud-Init didn\'t allow any option for DHCP."},{"id":"vm-scheduling","metadata":{"permalink":"/kb/vm-scheduling","editUrl":"https://github.com/harvester/harvesterhci.io/edit/main/kb/kb/2022-03-07/vm-scheduling.md","source":"@site/kb/2022-03-07/vm-scheduling.md","title":"VM Scheduling","description":"How does Harvester schedule VMs?","date":"2022-03-07T00:00:00.000Z","formattedDate":"March 7, 2022","tags":[{"label":"vm","permalink":"/kb/tags/vm"},{"label":"scheduling","permalink":"/kb/tags/scheduling"}],"readingTime":15.44,"truncated":false,"authors":[{"name":"PoAn Yang","title":"Software Engineer","url":"https://github.com/FrankYang0529","image_url":"https://github.com/FrankYang0529.png","imageURL":"https://github.com/FrankYang0529.png"}],"frontMatter":{"title":"VM Scheduling","description":"How does Harvester schedule VMs?","slug":"vm-scheduling","authors":[{"name":"PoAn Yang","title":"Software Engineer","url":"https://github.com/FrankYang0529","image_url":"https://github.com/FrankYang0529.png","imageURL":"https://github.com/FrankYang0529.png"}],"tags":["vm","scheduling"],"hide_table_of_contents":false},"prevItem":{"title":"Multiple NICs VM Connectivity","permalink":"/kb/multiple-nics-vm-connectivity"}},"content":"## How does Harvester schedule a VM?\\n\\nHarvester doesn\'t directly schedule a VM in Kubernetes, it relies on [KubeVirt](http://kubevirt.io/) to create the custom resource `VirtualMachine`. When the request to create a new VM is sent, a `VirtualMachineInstance` object is created and it creates the corresponding `Pod`.\\n\\nThe whole VM creation processt leverages `kube-scheduler`, which allows Harvester to use `nodeSelector`, `affinity`, and resources request/limitation to influence where a VM will be deployed.\\n\\n## How does kube-scheduler decide where to deploy a VM?\\n\\nFirst, `kube-scheduler` finds Nodes available to run a pod. After that, `kube-scheduler` scores each available Node by a list of [plugins](https://github.com/kubernetes/kubernetes/tree/v1.22.7/pkg/scheduler/framework/plugins) like [ImageLocality](https://github.com/kubernetes/kubernetes/blob/v1.22.7/pkg/scheduler/framework/plugins/imagelocality/image_locality.go), [InterPodAffinity](https://github.com/kubernetes/kubernetes/tree/v1.22.7/pkg/scheduler/framework/plugins/interpodaffinity), [NodeAffinity](https://github.com/kubernetes/kubernetes/tree/v1.22.7/pkg/scheduler/framework/plugins/nodeaffinity), etc. \\n\\nFinally, `kube-scheduler` calculates the scores from the plugins results for each Node, and select the Node with the highest score to deploy the Pod.\\n\\nFor example, let\'s say we have a three nodes Harvester cluster with 6 cores CPU and 16G RAM each, and we want to deploy a VM with 1 CPU and 1G RAM (without resources overcommit). \\n\\n`kube-scheduler` will summarize the scores, as displayed in _Table 1_ below, and will select the node with the highest score, `harvester-node-2` in this case, to deploy the VM.\\n\\n
\\n kube-scheduler logs\\n\\n```\\nvirt-launcher-vm-without-overcommit-75q9b -> harvester-node-0: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9960 memory:15166603264] ,score 0,\\nvirt-launcher-vm-without-overcommit-75q9b -> harvester-node-1: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5560 memory:6352273408] ,score 45,\\nvirt-launcher-vm-without-overcommit-75q9b -> harvester-node-2: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5350 memory:5941231616] ,score 46,\\n\\nvirt-launcher-vm-without-overcommit-75q9b -> harvester-node-0: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9960 memory:15166603264] ,score 4,\\nvirt-launcher-vm-without-overcommit-75q9b -> harvester-node-1: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5560 memory:6352273408] ,score 34,\\nvirt-launcher-vm-without-overcommit-75q9b -> harvester-node-2: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5350 memory:5941231616] ,score 37,\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"ImageLocality\\" node=\\"harvester-node-0\\" score=54\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"ImageLocality\\" node=\\"harvester-node-1\\" score=54\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"ImageLocality\\" node=\\"harvester-node-2\\" score=54\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"InterPodAffinity\\" node=\\"harvester-node-0\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"InterPodAffinity\\" node=\\"harvester-node-1\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"InterPodAffinity\\" node=\\"harvester-node-2\\" score=0\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"NodeResourcesLeastAllocated\\" node=\\"harvester-node-0\\" score=4\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"NodeResourcesLeastAllocated\\" node=\\"harvester-node-1\\" score=34\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"NodeResourcesLeastAllocated\\" node=\\"harvester-node-2\\" score=37\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"NodeAffinity\\" node=\\"harvester-node-0\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"NodeAffinity\\" node=\\"harvester-node-1\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"NodeAffinity\\" node=\\"harvester-node-2\\" score=0\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"NodePreferAvoidPods\\" node=\\"harvester-node-0\\" score=1000000\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"NodePreferAvoidPods\\" node=\\"harvester-node-2\\" score=1000000\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"NodePreferAvoidPods\\" node=\\"harvester-node-1\\" score=1000000\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"PodTopologySpread\\" node=\\"harvester-node-0\\" score=200\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"PodTopologySpread\\" node=\\"harvester-node-1\\" score=200\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"PodTopologySpread\\" node=\\"harvester-node-2\\" score=200\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"TaintToleration\\" node=\\"harvester-node-0\\" score=100\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"TaintToleration\\" node=\\"harvester-node-1\\" score=100\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"TaintToleration\\" node=\\"harvester-node-2\\" score=100\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"NodeResourcesBalancedAllocation\\" node=\\"harvester-node-0\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"NodeResourcesBalancedAllocation\\" node=\\"harvester-node-1\\" score=45\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" plugin=\\"NodeResourcesBalancedAllocation\\" node=\\"harvester-node-2\\" score=46\\n\\n\\"Calculated node\'s final score for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" node=\\"harvester-node-0\\" score=1000358\\n\\"Calculated node\'s final score for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" node=\\"harvester-node-1\\" score=1000433\\n\\"Calculated node\'s final score for pod\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" node=\\"harvester-node-2\\" score=1000437\\n\\nAssumePodVolumes for pod \\"default/virt-launcher-vm-without-overcommit-75q9b\\", node \\"harvester-node-2\\"\\nAssumePodVolumes for pod \\"default/virt-launcher-vm-without-overcommit-75q9b\\", node \\"harvester-node-2\\": all PVCs bound and nothing to do\\n\\"Attempting to bind pod to node\\" pod=\\"default/virt-launcher-vm-without-overcommit-75q9b\\" node=\\"harvester-node-2\\"\\n```\\n
\\n\\n**Table 1 - kube-scheduler scores example**\\n\\n| | harvester-node-0 | harvester-node-1 | harvester-node-2 |\\n|:-------------------------------:|:----------------:|:----------------:|:----------------:|\\n| ImageLocality | 54 | 54 | 54 |\\n| InterPodAffinity | 0 | 0 | 0 |\\n| NodeResourcesLeastAllocated | 4 | 34 | 37 |\\n| NodeAffinity | 0 | 0 | 0 |\\n| NodePreferAvoidPods | 1000000 | 1000000 | 1000000 |\\n| PodTopologySpread | 200 | 200 | 200 |\\n| TaintToleration | 100 | 100 | 100 |\\n| NodeResourcesBalancedAllocation | 0 | 45 | 46 |\\n| Total | 1000358 | 1000433 | 1000437 |\\n\\n## Why VMs are distributed unevenly with overcommit?\\n\\nWith resources overcommit, Harvester modifies the resources request. By default, the `overcommit` configuration is `{\\"cpu\\": 1600, \\"memory\\": 150, \\"storage\\": 200}`. This means that if we request a VM with 1 CPU and 1G RAM, its `resources.requests.cpu` will become `62m`. \\n\\n!!! note\\n The unit suffix `m` stands for \\"thousandth of a core.\\"\\n\\nTo explain it, let\'s take the case of CPU overcommit. The default value of 1 CPU is equal to 1000m CPU, and with the default overcommit configuration of `\\"cpu\\": 1600`, the CPU resource will be 16x smaller. Here is the calculation: `1000m * 100 / 1600 = 62m`.\\n\\nNow, we can see how overcommitting influences `kube-scheduler` scores.\\n\\nIn this example, we use a three nodes Harvester cluster with 6 cores and 16G RAM each. We will deploy two VMs with 1 CPU and 1G RAM, and we will compare the scores for both cases of \\"with-overcommit\\" and \\"without-overcommit\\" resources. \\n\\nThe results of both tables _Table 2_ and _Table 3_ can be explained as follow:\\n\\nIn the \\"with-overcommit\\" case, both VMs are deployed on `harvester-node-2`, however in the \\"without-overcommit\\" case, the VM1 is deployed on `harvester-node-2`, and VM2 is deployed on `harvester-node-1`. \\n\\nIf we look at the detailed scores, we\'ll see a variation of `Total Score` for `harvester-node-2` from `1000459` to `1000461` in the \\"with-overcommit\\" case, and `1000437` to `1000382` in the \\"without-overcommit case\\". It\'s because resources overcommit influences `request-cpu` and `request-memory`. \\n\\nIn the \\"with-overcommit\\" case, the `request-cpu` changes from `4412m` to `4474m`. The difference between the two numbers is `62m`, which is what we calculated above. However, in the \\"without-overcommit\\" case, we send **real** requests to `kube-scheduler`, so the `request-cpu` changes from `5350m` to `6350m`.\\n\\nFinally, since most plugins give the same scores for each node except `NodeResourcesBalancedAllocation` and `NodeResourcesLeastAllocated`, we\'ll see a difference of these two scores for each node.\\n\\nFrom the results, we can see the overcommit feature influences the final score of each Node, so VMs are distributed unevenly. Although the `harvester-node-2` score for VM 2 is higher than VM 1, it\'s not always increasing. In _Table 4_, we keep deploying VM with 1 CPU and 1G RAM, and we can see the score of `harvester-node-2` starts decreasing from 11th VM. The behavior of `kube-scheduler` depends on your cluster resources and the workload you deployed.\\n\\n
\\n kube-scheduler logs for vm1-with-overcommit\\n\\n```\\nvirt-launcher-vm1-with-overcommit-ljlmq -> harvester-node-0: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9022 memory:14807289856] ,score 0,\\nvirt-launcher-vm1-with-overcommit-ljlmq -> harvester-node-1: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4622 memory:5992960000] ,score 58,\\nvirt-launcher-vm1-with-overcommit-ljlmq -> harvester-node-2: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4412 memory:5581918208] ,score 59,\\n\\nvirt-launcher-vm1-with-overcommit-ljlmq -> harvester-node-0: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9022 memory:14807289856] ,score 5,\\nvirt-launcher-vm1-with-overcommit-ljlmq -> harvester-node-1: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4622 memory:5992960000] ,score 43,\\nvirt-launcher-vm1-with-overcommit-ljlmq -> harvester-node-2: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4412 memory:5581918208] ,score 46,\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"InterPodAffinity\\" node=\\"harvester-node-0\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"InterPodAffinity\\" node=\\"harvester-node-1\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"InterPodAffinity\\" node=\\"harvester-node-2\\" score=0\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"NodeResourcesLeastAllocated\\" node=\\"harvester-node-0\\" score=5\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"NodeResourcesLeastAllocated\\" node=\\"harvester-node-1\\" score=43\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"NodeResourcesLeastAllocated\\" node=\\"harvester-node-2\\" score=46\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"NodeAffinity\\" node=\\"harvester-node-0\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"NodeAffinity\\" node=\\"harvester-node-1\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"NodeAffinity\\" node=\\"harvester-node-2\\" score=0\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"NodePreferAvoidPods\\" node=\\"harvester-node-0\\" score=1000000\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"NodePreferAvoidPods\\" node=\\"harvester-node-1\\" score=1000000\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"NodePreferAvoidPods\\" node=\\"harvester-node-2\\" score=1000000\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"PodTopologySpread\\" node=\\"harvester-node-0\\" score=200\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"PodTopologySpread\\" node=\\"harvester-node-1\\" score=200\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"PodTopologySpread\\" node=\\"harvester-node-2\\" score=200\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"TaintToleration\\" node=\\"harvester-node-0\\" score=100\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"TaintToleration\\" node=\\"harvester-node-1\\" score=100\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"TaintToleration\\" node=\\"harvester-node-2\\" score=100\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"NodeResourcesBalancedAllocation\\" node=\\"harvester-node-0\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"NodeResourcesBalancedAllocation\\" node=\\"harvester-node-1\\" score=58\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"NodeResourcesBalancedAllocation\\" node=\\"harvester-node-2\\" score=59\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"ImageLocality\\" node=\\"harvester-node-0\\" score=54\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"ImageLocality\\" node=\\"harvester-node-1\\" score=54\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" plugin=\\"ImageLocality\\" node=\\"harvester-node-2\\" score=54\\n\\n\\"Calculated node\'s final score for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" node=\\"harvester-node-0\\" score=1000359\\n\\"Calculated node\'s final score for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" node=\\"harvester-node-1\\" score=1000455\\n\\"Calculated node\'s final score for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" node=\\"harvester-node-2\\" score=1000459\\n\\nAssumePodVolumes for pod \\"default/virt-launcher-vm1-with-overcommit-ljlmq\\", node \\"harvester-node-2\\"\\nAssumePodVolumes for pod \\"default/virt-launcher-vm1-with-overcommit-ljlmq\\", node \\"harvester-node-2\\": all PVCs bound and nothing to do\\n\\"Attempting to bind pod to node\\" pod=\\"default/virt-launcher-vm1-with-overcommit-ljlmq\\" node=\\"harvester-node-2\\"\\n```\\n
\\n\\n
\\n kube-scheduler logs for vm2-with-overcommit\\n\\n```\\nvirt-launcher-vm2-with-overcommit-pwrx4 -> harvester-node-0: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9022 memory:14807289856] ,score 0,\\nvirt-launcher-vm2-with-overcommit-pwrx4 -> harvester-node-1: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4622 memory:5992960000] ,score 58,\\nvirt-launcher-vm2-with-overcommit-pwrx4 -> harvester-node-2: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4474 memory:6476701696] ,score 64,\\n\\nvirt-launcher-vm2-with-overcommit-pwrx4 -> harvester-node-0: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9022 memory:14807289856] ,score 5,\\nvirt-launcher-vm2-with-overcommit-pwrx4 -> harvester-node-1: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4622 memory:5992960000] ,score 43,\\nvirt-launcher-vm2-with-overcommit-pwrx4 -> harvester-node-2: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4474 memory:6476701696] ,score 43,\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"NodeAffinity\\" node=\\"harvester-node-0\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"NodeAffinity\\" node=\\"harvester-node-1\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"NodeAffinity\\" node=\\"harvester-node-2\\" score=0\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"NodePreferAvoidPods\\" node=\\"harvester-node-0\\" score=1000000\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"NodePreferAvoidPods\\" node=\\"harvester-node-1\\" score=1000000\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"NodePreferAvoidPods\\" node=\\"harvester-node-2\\" score=1000000\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"PodTopologySpread\\" node=\\"harvester-node-0\\" score=200\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"PodTopologySpread\\" node=\\"harvester-node-1\\" score=200\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"PodTopologySpread\\" node=\\"harvester-node-2\\" score=200\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"TaintToleration\\" node=\\"harvester-node-0\\" score=100\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"TaintToleration\\" node=\\"harvester-node-1\\" score=100\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"TaintToleration\\" node=\\"harvester-node-2\\" score=100\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"NodeResourcesBalancedAllocation\\" node=\\"harvester-node-0\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"NodeResourcesBalancedAllocation\\" node=\\"harvester-node-1\\" score=58\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"NodeResourcesBalancedAllocation\\" node=\\"harvester-node-2\\" score=64\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"ImageLocality\\" node=\\"harvester-node-0\\" score=54\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"ImageLocality\\" node=\\"harvester-node-1\\" score=54\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"ImageLocality\\" node=\\"harvester-node-2\\" score=54\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"InterPodAffinity\\" node=\\"harvester-node-0\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"InterPodAffinity\\" node=\\"harvester-node-1\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"InterPodAffinity\\" node=\\"harvester-node-2\\" score=0\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"NodeResourcesLeastAllocated\\" node=\\"harvester-node-0\\" score=5\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"NodeResourcesLeastAllocated\\" node=\\"harvester-node-1\\" score=43\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" plugin=\\"NodeResourcesLeastAllocated\\" node=\\"harvester-node-2\\" score=43\\n\\n\\"Calculated node\'s final score for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" node=\\"harvester-node-0\\" score=1000359\\n\\"Calculated node\'s final score for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" node=\\"harvester-node-1\\" score=1000455\\n\\"Calculated node\'s final score for pod\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" node=\\"harvester-node-2\\" score=1000461\\n\\nAssumePodVolumes for pod \\"default/virt-launcher-vm2-with-overcommit-pwrx4\\", node \\"harvester-node-2\\"\\nAssumePodVolumes for pod \\"default/virt-launcher-vm2-with-overcommit-pwrx4\\", node \\"harvester-node-2\\": all PVCs bound and nothing to do\\n\\"Attempting to bind pod to node\\" pod=\\"default/virt-launcher-vm2-with-overcommit-pwrx4\\" node=\\"harvester-node-2\\"\\n```\\n
\\n\\n
\\n kube-scheduler logs for vm1-without-overcommit\\n\\n```\\nvirt-launcher-vm1-with-overcommit-6xqmq -> harvester-node-0: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9960 memory:15166603264] ,score 0,\\nvirt-launcher-vm1-with-overcommit-6xqmq -> harvester-node-1: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5560 memory:6352273408] ,score 45,\\nvirt-launcher-vm1-with-overcommit-6xqmq -> harvester-node-2: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5350 memory:5941231616] ,score 46,\\n\\nvirt-launcher-vm1-with-overcommit-6xqmq -> harvester-node-0: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9960 memory:15166603264] ,score 4,\\nvirt-launcher-vm1-with-overcommit-6xqmq -> harvester-node-1: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5560 memory:6352273408] ,score 34,\\nvirt-launcher-vm1-with-overcommit-6xqmq -> harvester-node-2: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5350 memory:5941231616] ,score 37,\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"InterPodAffinity\\" node=\\"harvester-node-0\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"InterPodAffinity\\" node=\\"harvester-node-1\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"InterPodAffinity\\" node=\\"harvester-node-2\\" score=0\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"NodeResourcesLeastAllocated\\" node=\\"harvester-node-0\\" score=4\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"NodeResourcesLeastAllocated\\" node=\\"harvester-node-1\\" score=34\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"NodeResourcesLeastAllocated\\" node=\\"harvester-node-2\\" score=37\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"NodeAffinity\\" node=\\"harvester-node-0\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"NodeAffinity\\" node=\\"harvester-node-1\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"NodeAffinity\\" node=\\"harvester-node-2\\" score=0\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"NodePreferAvoidPods\\" node=\\"harvester-node-0\\" score=1000000\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"NodePreferAvoidPods\\" node=\\"harvester-node-1\\" score=1000000\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"NodePreferAvoidPods\\" node=\\"harvester-node-2\\" score=1000000\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"PodTopologySpread\\" node=\\"harvester-node-0\\" score=200\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"PodTopologySpread\\" node=\\"harvester-node-1\\" score=200\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"PodTopologySpread\\" node=\\"harvester-node-2\\" score=200\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"TaintToleration\\" node=\\"harvester-node-0\\" score=100\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"TaintToleration\\" node=\\"harvester-node-1\\" score=100\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"TaintToleration\\" node=\\"harvester-node-2\\" score=100\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"NodeResourcesBalancedAllocation\\" node=\\"harvester-node-0\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"NodeResourcesBalancedAllocation\\" node=\\"harvester-node-1\\" score=45\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"NodeResourcesBalancedAllocation\\" node=\\"harvester-node-2\\" score=46\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"ImageLocality\\" node=\\"harvester-node-0\\" score=54\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"ImageLocality\\" node=\\"harvester-node-1\\" score=54\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" plugin=\\"ImageLocality\\" node=\\"harvester-node-2\\" score=54\\n\\n\\"Calculated node\'s final score for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" node=\\"harvester-node-0\\" score=1000358\\n\\"Calculated node\'s final score for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" node=\\"harvester-node-1\\" score=1000433\\n\\"Calculated node\'s final score for pod\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" node=\\"harvester-node-2\\" score=1000437\\n\\nAssumePodVolumes for pod \\"default/virt-launcher-vm1-with-overcommit-6xqmq\\", node \\"harvester-node-2\\"\\nAssumePodVolumes for pod \\"default/virt-launcher-vm1-with-overcommit-6xqmq\\", node \\"harvester-node-2\\": all PVCs bound and nothing to do\\n\\"Attempting to bind pod to node\\" pod=\\"default/virt-launcher-vm1-with-overcommit-6xqmq\\" node=\\"harvester-node-2\\"\\n```\\n
\\n\\n
\\n kube-scheduler logs for vm2-without-overcommit\\n\\n```\\nvirt-launcher-vm2-without-overcommit-mf5vk -> harvester-node-0: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9960 memory:15166603264] ,score 0,\\nvirt-launcher-vm2-without-overcommit-mf5vk -> harvester-node-1: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5560 memory:6352273408] ,score 45,\\nvirt-launcher-vm2-without-overcommit-mf5vk -> harvester-node-2: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:6350 memory:7195328512] ,score 0,\\n\\nvirt-launcher-vm2-without-overcommit-mf5vk -> harvester-node-0: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9960 memory:15166603264] ,score 4,\\nvirt-launcher-vm2-without-overcommit-mf5vk -> harvester-node-1: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5560 memory:6352273408] ,score 34,\\nvirt-launcher-vm2-without-overcommit-mf5vk -> harvester-node-2: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:6350 memory:7195328512] ,score 28,\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"PodTopologySpread\\" node=\\"harvester-node-0\\" score=200\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"PodTopologySpread\\" node=\\"harvester-node-1\\" score=200\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"PodTopologySpread\\" node=\\"harvester-node-2\\" score=200\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"TaintToleration\\" node=\\"harvester-node-0\\" score=100\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"TaintToleration\\" node=\\"harvester-node-1\\" score=100\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"TaintToleration\\" node=\\"harvester-node-2\\" score=100\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"NodeResourcesBalancedAllocation\\" node=\\"harvester-node-0\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"NodeResourcesBalancedAllocation\\" node=\\"harvester-node-1\\" score=45\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"NodeResourcesBalancedAllocation\\" node=\\"harvester-node-2\\" score=0\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"ImageLocality\\" node=\\"harvester-node-0\\" score=54\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"ImageLocality\\" node=\\"harvester-node-1\\" score=54\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"ImageLocality\\" node=\\"harvester-node-2\\" score=54\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"InterPodAffinity\\" node=\\"harvester-node-0\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"InterPodAffinity\\" node=\\"harvester-node-1\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"InterPodAffinity\\" node=\\"harvester-node-2\\" score=0\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"NodeResourcesLeastAllocated\\" node=\\"harvester-node-0\\" score=4\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"NodeResourcesLeastAllocated\\" node=\\"harvester-node-1\\" score=34\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"NodeResourcesLeastAllocated\\" node=\\"harvester-node-2\\" score=28\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"NodeAffinity\\" node=\\"harvester-node-0\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"NodeAffinity\\" node=\\"harvester-node-1\\" score=0\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"NodeAffinity\\" node=\\"harvester-node-2\\" score=0\\n\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"NodePreferAvoidPods\\" node=\\"harvester-node-0\\" score=1000000\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"NodePreferAvoidPods\\" node=\\"harvester-node-1\\" score=1000000\\n\\"Plugin scored node for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" plugin=\\"NodePreferAvoidPods\\" node=\\"harvester-node-2\\" score=1000000\\n\\n\\"Calculated node\'s final score for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" node=\\"harvester-node-0\\" score=1000358\\n\\"Calculated node\'s final score for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" node=\\"harvester-node-1\\" score=1000433\\n\\"Calculated node\'s final score for pod\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" node=\\"harvester-node-2\\" score=1000382\\n\\nAssumePodVolumes for pod \\"default/virt-launcher-vm2-without-overcommit-mf5vk\\", node \\"harvester-node-1\\"\\nAssumePodVolumes for pod \\"default/virt-launcher-vm2-without-overcommit-mf5vk\\", node \\"harvester-node-1\\": all PVCs bound and nothing to do\\n\\"Attempting to bind pod to node\\" pod=\\"default/virt-launcher-vm2-without-overcommit-mf5vk\\" node=\\"harvester-node-1\\"\\n```\\n
\\n\\n**Table 2 - With Overcommit**\\n\\n| VM 1 / VM 2 | harvester-node-0 | harvester-node-1 | harvester-node-2 |\\n|:-------------------------------------:|--------------------------:|------------------------:|------------------------:|\\n| request-cpu (m) | 9022 / 9022 | 4622 / 4622 | **4412** / **4474** |\\n| request-memory | 14807289856 / 14807289856 | 5992960000 / 5992960000 | **5581918208** / **6476701696** |\\n| NodeResourcesBalancedAllocation Score | 0 / 0 | 58 / 58 | **59** / **64** |\\n| NodeResourcesLeastAllocated Score | 5 / 5 | 43 / 43 | **46** / **43** |\\n| Other Scores | 1000354 / 1000354 | 1000354 / 1000354 | 1000354 / 1000354 |\\n| Total Score | 1000359 / 1000359 | 1000455 / 1000455 | **1000459** / **1000461** |\\n\\n**Table 3 - Without Overcommit**\\n\\n| VM 1 / VM 2 | harvester-node-0 | harvester-node-1 | harvester-node-2 |\\n|:-------------------------------------:|--------------------------:|------------------------:|------------------------:|\\n| request-cpu (m) | 9960 / 9960 | 5560 / **5560** | **5350** / 6350 |\\n| request-memory | 15166603264 / 15166603264 | 6352273408 / **6352273408** | **5941231616** / 7195328512 |\\n| NodeResourcesBalancedAllocation Score | 0 / 0 | 45 / **45** | **46** / 0 |\\n| NodeResourcesLeastAllocated Score | 4 / 4 | 34 / **34** | **37** / 28 |\\n| Other Scores | 1000354 / 1000354 | 1000354 / **1000354** | **1000354** / 1000354 |\\n| Total Score | 1000358 / 1000358 | 1000358 / **1000433** | **1000437** / 1000382 |\\n\\n**Table 4**\\n\\n| Score | harvester-node-0 | harvester-node-1 | harvester-node-2 |\\n|:-----:|-----------------:|-----------------:|-----------------:|\\n| VM 1 | 1000359 | 1000455 | 1000459 |\\n| VM 2 | 1000359 | 1000455 | 1000461 |\\n| VM 3 | 1000359 | 1000455 | 1000462 |\\n| VM 4 | 1000359 | 1000455 | 1000462 |\\n| VM 5 | 1000359 | 1000455 | 1000463 |\\n| VM 6 | 1000359 | 1000455 | 1000465 |\\n| VM 7 | 1000359 | 1000455 | 1000466 |\\n| VM 8 | 1000359 | 1000455 | 1000467 |\\n| VM 9 | 1000359 | 1000455 | 1000469 |\\n| VM 10 | 1000359 | 1000455 | 1000469 |\\n| VM 11 | 1000359 | 1000455 | **1000465** |\\n| VM 12 | 1000359 | 1000455 | **1000457** |\\n\\n\\n## How to avoid uneven distribution of VMs?\\n\\nThere are many plugins in `kube-scheduler` which we can use to influence the scores. For example, we can add the `podAntiAffinity` plugin to avoid VMs with the same labels being deployed on the same node.\\n\\n```\\n affinity:\\n podAntiAffinity:\\n preferredDuringSchedulingIgnoredDuringExecution:\\n - podAffinityTerm:\\n labelSelector:\\n matchExpressions:\\n - key: harvesterhci.io/creator\\n operator: Exists\\n topologyKey: kubernetes.io/hostname\\n weight: 100\\n```\\n\\n## How to see scores in kube-scheduler?\\n\\n`kube-scheduler` is deployed as a static pod in Harvester. The file is under `/var/lib/rancher/rke2/agent/pod-manifests/kube-scheduler.yaml` in each Management Node. We can add `- --v=10` to the `kube-scheduler` container to show score logs.\\n\\n```\\nkind: Pod\\nmetadata:\\n labels:\\n component: kube-scheduler\\n tier: control-plane\\n name: kube-scheduler\\n namespace: kube-system\\nspec:\\n containers:\\n - command:\\n - kube-scheduler\\n # ...\\n - --v=10\\n```"}]}')}}]); \ No newline at end of file diff --git a/assets/js/runtime~main.89f7ad10.js b/assets/js/runtime~main.f5155a29.js similarity index 97% rename from assets/js/runtime~main.89f7ad10.js rename to assets/js/runtime~main.f5155a29.js index f01734ab..c53c22c6 100644 --- a/assets/js/runtime~main.89f7ad10.js +++ b/assets/js/runtime~main.f5155a29.js @@ -1 +1 @@ -!function(){"use strict";var e,f,c,a,d,b={},t={};function r(e){var f=t[e];if(void 0!==f)return f.exports;var c=t[e]={id:e,loaded:!1,exports:{}};return b[e].call(c.exports,c,c.exports,r),c.loaded=!0,c.exports}r.m=b,r.c=t,e=[],r.O=function(f,c,a,d){if(!c){var b=1/0;for(i=0;i=d)&&Object.keys(r.O).every((function(e){return r.O[e](c[n])}))?c.splice(n--,1):(t=!1,d0&&e[i-1][2]>d;i--)e[i]=e[i-1];e[i]=[c,a,d]},r.n=function(e){var f=e&&e.__esModule?function(){return e.default}:function(){return e};return r.d(f,{a:f}),f},c=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},r.t=function(e,a){if(1&a&&(e=this(e)),8&a)return e;if("object"==typeof e&&e){if(4&a&&e.__esModule)return e;if(16&a&&"function"==typeof e.then)return e}var d=Object.create(null);r.r(d);var b={};f=f||[null,c({}),c([]),c(c)];for(var t=2&a&&e;"object"==typeof t&&!~f.indexOf(t);t=c(t))Object.getOwnPropertyNames(t).forEach((function(f){b[f]=function(){return e[f]}}));return b.default=function(){return e},r.d(d,b),d},r.d=function(e,f){for(var c in f)r.o(f,c)&&!r.o(e,c)&&Object.defineProperty(e,c,{enumerable:!0,get:f[c]})},r.f={},r.e=function(e){return Promise.all(Object.keys(r.f).reduce((function(f,c){return r.f[c](e,f),f}),[]))},r.u=function(e){return"assets/js/"+({53:"935f2afb",64:"dfe5bdea",91:"8df36664",200:"96f60120",216:"8193d050",497:"3ba2c23c",508:"cddb2b43",710:"f0c91217",904:"e28f6d09",931:"243a84ef",996:"df662e3a",1112:"d2131cf8",1132:"70ac88c9",1205:"aa3f5c29",1227:"91cce478",1342:"433aded9",1397:"f7476351",1477:"b2f554cd",1619:"b43ca6ce",1697:"d515d139",1738:"8d046448",1742:"3acd4763",1763:"911e74ee",1819:"ca8e3d06",2041:"2a998ccf",2142:"55e5ed55",2247:"a7ecf002",2285:"9090cdf0",2332:"3916281a",2501:"37f61a63",2574:"c91f9634",2582:"667cfcf0",2587:"4158182c",2656:"0e8a02fe",2672:"7db59f7b",2776:"276be20f",2931:"7c3e736a",3006:"4c2f44cf",3076:"5b621466",3081:"fabbf3b9",3085:"1f391b9e",3089:"a6aa9e1f",3238:"340e0589",3303:"f4cd6279",3323:"f1033f2d",3531:"803c5e79",3571:"cd1f88af",3591:"a72d5d29",3608:"9e4087bc",3612:"fa8bddad",3664:"4eaec003",3693:"f12c7264",3961:"784e61c2",4013:"01a85c17",4195:"c4f5d8e4",4215:"916d1613",4355:"a4e89433",4507:"7fd1b631",4671:"0fde8c5c",4725:"e3dc72db",4950:"7a1ef0d5",4964:"28940e03",5028:"0391fca0",5187:"7d456a76",5257:"0855f3c9",5309:"d7912600",5391:"ef776918",5396:"96b7d83e",5417:"b51a971d",5450:"e2034994",5612:"500c69b0",5910:"97fcede1",5992:"3ad82185",6103:"ccc49370",6115:"406c8296",6408:"660b8ac9",6486:"364f618a",6653:"9d72eb39",6718:"083e3fb7",6760:"e1ac9cc6",6995:"721aff08",7012:"44234b90",7168:"6f31d0d1",7368:"85e739f5",7414:"393be207",7575:"0a346684",7604:"4c315a06",7686:"e4ac00f7",7717:"d1eefc28",7869:"74ed3cfa",7918:"17896441",8082:"0fcd7fb0",8094:"670bc9ec",8134:"be6024aa",8427:"6c37d39f",8433:"72258991",8610:"6875c492",8628:"09daad34",8669:"f7a75ad6",8964:"bbd95991",8969:"2f202bf6",8971:"5ab486be",8987:"1ead3a54",9140:"f156f67b",9168:"625b1ed7",9175:"8f2b3fc3",9176:"a01d9880",9377:"ae7bdb39",9398:"2f9a356c",9458:"6bb361e2",9462:"b527f526",9514:"1be78505",9526:"60c5d1f6",9671:"0e384e19",9762:"ee9d51b9",9828:"a02d39cc"}[e]||e)+"."+{53:"75cfd3ab",64:"aa8ca877",91:"311d578b",200:"afa6e16a",216:"6471390c",497:"045de022",508:"356e5699",710:"f941a90c",904:"181a5ca5",931:"b6a367e8",996:"efb1b6da",1112:"ad7dcf4a",1132:"52d89371",1205:"21eed176",1227:"7f528607",1342:"15f44e56",1397:"bc73ea55",1477:"3d45286b",1619:"90a2408d",1697:"0b704b21",1738:"c8e05a0b",1742:"2c8ae208",1763:"57c1a24d",1819:"51d08b52",2041:"fdc32e0f",2142:"89dfc0b2",2247:"a748b538",2285:"9c2b497f",2332:"78dbd7c6",2501:"964456b9",2574:"13bb4a54",2582:"d0f754c9",2587:"6aab133b",2656:"e425198b",2672:"194d4366",2776:"2de50f35",2931:"15a8de0a",3006:"129c6a45",3076:"22e2ef95",3081:"306fe095",3085:"51955a00",3089:"1e4f55da",3238:"055a1896",3303:"6158d051",3323:"967dc3fd",3531:"e74bbc6c",3571:"f3955052",3591:"667e7eac",3608:"d2860752",3612:"639cc26e",3664:"4d870272",3693:"f9fa0bdd",3961:"46b1b3cc",4013:"0a28a109",4195:"b133e444",4215:"32055b79",4355:"4be9a348",4507:"d2dc4fb7",4608:"84e94bf0",4671:"42208b82",4725:"b8a0006f",4950:"c51cb8b4",4964:"f9c541fe",5028:"1d03ee94",5187:"d9bea0af",5257:"0766a8a2",5309:"50381bb7",5391:"5e7f11e9",5396:"58a39c65",5417:"f4e248f6",5450:"a8564c02",5612:"8cd35636",5897:"7bbca8a3",5910:"e409db1d",5992:"936cf4b6",6103:"17d10aee",6115:"d00a2805",6408:"8a9d1f11",6486:"6ae04147",6653:"6f6fde0c",6718:"e7ff50ec",6760:"32dce01a",6995:"34cb3cc8",7012:"ce8ebb69",7168:"0b25ebb0",7368:"bc1a5b1c",7414:"aaf1357b",7575:"8eedd010",7604:"77880117",7686:"10625468",7717:"b453b5ba",7869:"30159960",7918:"2817560e",8082:"51548e29",8094:"55dd8a39",8134:"f203fad5",8427:"45115332",8433:"54b73783",8610:"b46e83e4",8628:"2c06d0f0",8669:"3c60a155",8964:"62c0f4e7",8969:"23fa802a",8971:"4e485ea7",8987:"2f79fed7",9140:"cb788b6a",9168:"067a7151",9175:"0209b3c1",9176:"c9c49266",9377:"3c39f6b5",9398:"ede4dc9d",9458:"3d31446c",9462:"94003978",9514:"32360d1f",9526:"2b642e26",9671:"143827b2",9762:"b46fbf90",9828:"fe4a0b47"}[e]+".js"},r.miniCssF=function(e){},r.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||new Function("return this")()}catch(e){if("object"==typeof window)return window}}(),r.o=function(e,f){return Object.prototype.hasOwnProperty.call(e,f)},a={},d="harvesterhci.io:",r.l=function(e,f,c,b){if(a[e])a[e].push(f);else{var t,n;if(void 0!==c)for(var o=document.getElementsByTagName("script"),i=0;i=d)&&Object.keys(r.O).every((function(e){return r.O[e](c[n])}))?c.splice(n--,1):(t=!1,d0&&e[i-1][2]>d;i--)e[i]=e[i-1];e[i]=[c,a,d]},r.n=function(e){var f=e&&e.__esModule?function(){return e.default}:function(){return e};return r.d(f,{a:f}),f},c=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},r.t=function(e,a){if(1&a&&(e=this(e)),8&a)return e;if("object"==typeof e&&e){if(4&a&&e.__esModule)return e;if(16&a&&"function"==typeof e.then)return e}var d=Object.create(null);r.r(d);var b={};f=f||[null,c({}),c([]),c(c)];for(var t=2&a&&e;"object"==typeof t&&!~f.indexOf(t);t=c(t))Object.getOwnPropertyNames(t).forEach((function(f){b[f]=function(){return e[f]}}));return b.default=function(){return e},r.d(d,b),d},r.d=function(e,f){for(var c in f)r.o(f,c)&&!r.o(e,c)&&Object.defineProperty(e,c,{enumerable:!0,get:f[c]})},r.f={},r.e=function(e){return Promise.all(Object.keys(r.f).reduce((function(f,c){return r.f[c](e,f),f}),[]))},r.u=function(e){return"assets/js/"+({53:"935f2afb",64:"dfe5bdea",91:"8df36664",200:"96f60120",216:"8193d050",497:"3ba2c23c",508:"cddb2b43",710:"f0c91217",904:"e28f6d09",931:"243a84ef",996:"df662e3a",1112:"d2131cf8",1132:"70ac88c9",1205:"aa3f5c29",1227:"91cce478",1342:"433aded9",1397:"f7476351",1477:"b2f554cd",1619:"b43ca6ce",1697:"d515d139",1738:"8d046448",1742:"3acd4763",1763:"911e74ee",1819:"ca8e3d06",2041:"2a998ccf",2142:"55e5ed55",2247:"a7ecf002",2285:"9090cdf0",2332:"3916281a",2501:"37f61a63",2574:"c91f9634",2582:"667cfcf0",2587:"4158182c",2656:"0e8a02fe",2672:"7db59f7b",2776:"276be20f",2931:"7c3e736a",3006:"4c2f44cf",3076:"5b621466",3081:"fabbf3b9",3085:"1f391b9e",3089:"a6aa9e1f",3238:"340e0589",3303:"f4cd6279",3323:"f1033f2d",3531:"803c5e79",3571:"cd1f88af",3591:"a72d5d29",3608:"9e4087bc",3612:"fa8bddad",3664:"4eaec003",3693:"f12c7264",3961:"784e61c2",4013:"01a85c17",4195:"c4f5d8e4",4215:"916d1613",4355:"a4e89433",4507:"7fd1b631",4671:"0fde8c5c",4725:"e3dc72db",4950:"7a1ef0d5",4964:"28940e03",5028:"0391fca0",5187:"7d456a76",5257:"0855f3c9",5309:"d7912600",5391:"ef776918",5396:"96b7d83e",5417:"b51a971d",5450:"e2034994",5612:"500c69b0",5910:"97fcede1",5992:"3ad82185",6103:"ccc49370",6115:"406c8296",6408:"660b8ac9",6486:"364f618a",6653:"9d72eb39",6718:"083e3fb7",6760:"e1ac9cc6",6995:"721aff08",7012:"44234b90",7168:"6f31d0d1",7368:"85e739f5",7414:"393be207",7575:"0a346684",7604:"4c315a06",7686:"e4ac00f7",7717:"d1eefc28",7869:"74ed3cfa",7918:"17896441",8082:"0fcd7fb0",8094:"670bc9ec",8134:"be6024aa",8427:"6c37d39f",8433:"72258991",8610:"6875c492",8628:"09daad34",8669:"f7a75ad6",8964:"bbd95991",8969:"2f202bf6",8971:"5ab486be",8987:"1ead3a54",9140:"f156f67b",9168:"625b1ed7",9175:"8f2b3fc3",9176:"a01d9880",9377:"ae7bdb39",9398:"2f9a356c",9458:"6bb361e2",9462:"b527f526",9514:"1be78505",9526:"60c5d1f6",9671:"0e384e19",9762:"ee9d51b9",9828:"a02d39cc"}[e]||e)+"."+{53:"75cfd3ab",64:"aa8ca877",91:"311d578b",200:"afa6e16a",216:"6471390c",497:"045de022",508:"356e5699",710:"f941a90c",904:"181a5ca5",931:"0b2fd61c",996:"efb1b6da",1112:"ad7dcf4a",1132:"52d89371",1205:"21eed176",1227:"7f528607",1342:"15f44e56",1397:"bc73ea55",1477:"3d45286b",1619:"90a2408d",1697:"0b704b21",1738:"c8e05a0b",1742:"2c8ae208",1763:"57c1a24d",1819:"51d08b52",2041:"fdc32e0f",2142:"89dfc0b2",2247:"a748b538",2285:"9c2b497f",2332:"78dbd7c6",2501:"964456b9",2574:"13bb4a54",2582:"d0f754c9",2587:"6aab133b",2656:"e425198b",2672:"194d4366",2776:"2de50f35",2931:"15a8de0a",3006:"129c6a45",3076:"22e2ef95",3081:"306fe095",3085:"51955a00",3089:"1e4f55da",3238:"055a1896",3303:"6158d051",3323:"967dc3fd",3531:"e74bbc6c",3571:"f3955052",3591:"667e7eac",3608:"d2860752",3612:"639cc26e",3664:"4d870272",3693:"f9fa0bdd",3961:"46b1b3cc",4013:"0a28a109",4195:"b133e444",4215:"32055b79",4355:"4be9a348",4507:"d2dc4fb7",4608:"84e94bf0",4671:"42208b82",4725:"b8a0006f",4950:"c0159de7",4964:"f9c541fe",5028:"1d03ee94",5187:"d9bea0af",5257:"0766a8a2",5309:"50381bb7",5391:"5e7f11e9",5396:"58a39c65",5417:"f4e248f6",5450:"a8564c02",5612:"8cd35636",5897:"7bbca8a3",5910:"e409db1d",5992:"936cf4b6",6103:"17d10aee",6115:"d00a2805",6408:"8a9d1f11",6486:"6ae04147",6653:"6f6fde0c",6718:"e7ff50ec",6760:"32dce01a",6995:"34cb3cc8",7012:"ce8ebb69",7168:"0b25ebb0",7368:"bc1a5b1c",7414:"aaf1357b",7575:"8eedd010",7604:"77880117",7686:"10625468",7717:"b453b5ba",7869:"30159960",7918:"2817560e",8082:"51548e29",8094:"55dd8a39",8134:"f203fad5",8427:"45115332",8433:"54b73783",8610:"b46e83e4",8628:"2c06d0f0",8669:"3c60a155",8964:"62c0f4e7",8969:"23fa802a",8971:"4e485ea7",8987:"2f79fed7",9140:"cb788b6a",9168:"067a7151",9175:"0209b3c1",9176:"c9c49266",9377:"3c39f6b5",9398:"b42bd147",9458:"3d31446c",9462:"94003978",9514:"32360d1f",9526:"2b642e26",9671:"143827b2",9762:"b46fbf90",9828:"fe4a0b47"}[e]+".js"},r.miniCssF=function(e){},r.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||new Function("return this")()}catch(e){if("object"==typeof window)return window}}(),r.o=function(e,f){return Object.prototype.hasOwnProperty.call(e,f)},a={},d="harvesterhci.io:",r.l=function(e,f,c,b){if(a[e])a[e].push(f);else{var t,n;if(void 0!==c)for(var o=document.getElementsByTagName("script"),i=0;i Authentication | The open-source hyperconverged infrastructure solution for a cloud-native world - +

Authentication

Available as of v0.2.0

With ISO installation mode, user will be prompted to set the password for the default admin user on the first-time login.

auth

The Harvester login page is shown below:

auth

Developer Mode

In developer mode, which is intended only for development and testing purposes, more authentication modes are configurable using the environment variable HARVESTER_AUTHENTICATION_MODE.

By default, the Harvester Dashboard uses the local auth mode for authentication. The default username and password is admin/password.

The currently supported options are localUser (the same as local auth mode) and kubernetesCredentials.

If the kubernetesCredentials authentication option is used, either a kubeconfig file or bearer token can provide access to Harvester.

- + \ No newline at end of file diff --git a/blog/archive/index.html b/blog/archive/index.html index 7e049f90..d3f4d14b 100644 --- a/blog/archive/index.html +++ b/blog/archive/index.html @@ -9,13 +9,13 @@ Archive | The open-source hyperconverged infrastructure solution for a cloud-native world - + - + \ No newline at end of file diff --git a/development/dev-mode/index.html b/development/dev-mode/index.html index 6c2acf98..a9155f79 100644 --- a/development/dev-mode/index.html +++ b/development/dev-mode/index.html @@ -9,7 +9,7 @@ Developer Mode Installation | The open-source hyperconverged infrastructure solution for a cloud-native world - + @@ -17,7 +17,7 @@

Developer Mode Installation

Developer mode (dev mode) is intended to be used for testing and development purposes.

Note: This video shows the dev mode installation.

Requirements

  • For dev mode, it is assumed that Multus is installed across your cluster and a corresponding NetworkAttachmentDefinition CRD was created.
  • For dev mode, if you are using an RKE cluster, please ensure the ipv4.ip_forward is enabled for the CNI plugin so that the pod network works as expected. #94.

Install as an App

Harvester can be installed on a Kubernetes cluster in the following ways:

  • Install with the Helm CLI
  • Install as a Rancher catalog app, in which case the harvester/harvester repo is added to the Rancher Catalog as a Helm v3 app

Please refer to the Harvester Helm chart for more details on installing and configuring the Helm chart.

Requirements

The Kubernetes node must have hardware virtualization support.

To validate the support, use this command:

cat /proc/cpuinfo | grep vmx

Option 1: Install using Helm

  1. Clone the GitHub repository:

    $ git clone https://github.com/harvester/harvester.git --depth=1
  2. Go to the Helm chart:

    $ cd harvester/deploy/charts
  3. Install the Harvester chart with the following commands:

### To install the chart with the release name `harvester`:

## Create the target namespace
$ kubectl create ns harvester-system

## Install the chart to the target namespace
$ helm install harvester harvester \
--namespace harvester-system \
--set longhorn.enabled=true,minio.persistence.storageClass=longhorn

Option 2: Install using Rancher

Tip: You can create a testing Kubernetes environment in Rancher using the Digital Ocean cloud provider. For details, see this section.

  1. Add the Harvester repo https://github.com/harvester/harvester to your Rancher catalogs by clicking Global > Tools > Catalogs.
  2. Specify the URL and name. Set the branch to stable if you need a stable release version. Set the Helm version to be Helm v3. harvester-catalog.png
  3. Click Create.
  4. Navigate to your project-level Apps.
  5. Click Launch and choose the Harvester app.
  6. (Optional) You can modify the configurations if needed. Otherwise, use the default options.
  7. Click Launch and wait for the app's components to be ready.
  8. Click the /index.html link to navigate to the Harvester UI: harvester-app.png

Digital Ocean Test Environment

Digital Ocean is one of the cloud providers who support nested virtualization by default.

You can create a testing Kubernetes environment in Rancher using the Digital Ocean cloud provider.

We recommend using the 8 core, 16 GB RAM node, which will have nested virtualization enabled by default.

This screenshot shows how to create a Rancher node template that would allow Rancher to provision such a node in Digital Ocean:

do.png

For more information on how to launch Digital Ocean nodes with Rancher, refer to the Rancher documentation.

- + \ No newline at end of file diff --git a/harvester-network/index.html b/harvester-network/index.html index 2859ecf6..7401f825 100644 --- a/harvester-network/index.html +++ b/harvester-network/index.html @@ -9,13 +9,13 @@ Harvester Network | The open-source hyperconverged infrastructure solution for a cloud-native world - +

Harvester Network

Summary

Harvester is built on Kubernetes, which uses CNI as an interface between network providers and Kubernetes pod networking. Naturally, we implement the Harvester network based on CNI. Moreover, the Harvester UI integrates the Harvester network to provide a user-friendly way to configure networks for VMs.

By version 0.2, Harvester supports two kinds of networks:

  • management network
  • VLAN

Implementation

Management Network

Harvester adopts flannel as the default CNI to implement the management network. It's an internal network, which means the user can only access the VM's management network within its cluster nodes or pods.

VLAN

Harvester network-controller leverages the multus and bridge CNI plugins to implement the VLAN.

Below is a use case of the VLAN in Harvester.

  • Harvester network-controller uses a bridge for a node and a pair of veth for a VM to implement the VLAN. The bridge acts as a switch to forward the network traffic from or to VMs and the veth pair is like the connected ports between vms and switch.
  • VMs within the same VLAN is able to communicate with each other, while the VMs within different VLANs can't.
  • The external switch ports connected with the hosts or other devices(such as DHCP server) should be set as trunk or hybrid type and permit the specified VLANs.
  • Users can use VLAN with PVID (default 1) to communicate with any normal untagged traffic.

Enabling VLAN in the Harvester UI

Enable VLAN via going to Setting > vlan to enable VLAN and input a valid default physical NIC name for the VLAN.

The first physical NIC name of each Harvester node always defaults to eth0. It is recommended to choose a separate NIC for the VLAN other than the one used for the management network (the one selected during the Harvester installation) for better network performance and isolation.

Note: Modifying the default VLAN network setting will not change the existing configured host networks.

  • (optional) Users can always customize each node's VLAN network configuration via going to the HOST > Network tab.

  • A new VLAN network is created by going to the Advanced > Networks page and clicking the Create button.

  • The network is configured when the VM is created.

    • Only the first network interface card will be enabled by default. Users can either choose to use a management network or VLAN network. Note: You will need to select the Install guest agent option in the Advanced Options tab to get the VLAN network IP address from the Harvester UI.

    • Users can choose to add one or multiple network interface cards. Additional network interface card configurations can be set via cloud-init network data, e.g.:

      version: 1
      config:
      - type: physical
      name: enp1s0 # name is varies upon OS image
      subnets:
      - type: dhcp
      - type: physical
      name: enp2s0
      subnets:
      - type: DHCP
- + \ No newline at end of file diff --git a/import-image/index.html b/import-image/index.html index 93aa0f02..44b84423 100644 --- a/import-image/index.html +++ b/import-image/index.html @@ -9,13 +9,13 @@ Import Images | The open-source hyperconverged infrastructure solution for a cloud-native world - +

Import Images

To import virtual machine images in the Images page, enter a URL that can be accessed from the cluster. Note: The image name will be auto-filled using the URL address's filename. You can always customize it when required.

Currently, we support qcow2, raw, and ISO images.

Note: uploading images from UI to the Harvester cluster is not supported yet. The feature request is being tracked on #570.

Description and labels are optional.

- + \ No newline at end of file diff --git a/index.html b/index.html index 259aaa97..6f6ab782 100644 --- a/index.html +++ b/index.html @@ -9,13 +9,13 @@ Harvester - Open-source hyperconverged infrastructure - +

The open-source hyperconverged infrastructure solution for a cloud-native world

Great for

Running Kubernetes in VMs on top of Harvester
Running containerized workloads on bare metal servers
Transitioning and modernizing workloads to cloud-native
The edge

What is Harvester?

Harvester is a modern hyperconverged infrastructure (HCI) solution built for bare metal servers using enterprise-grade open-source technologies including Linux, KVM, Kubernetes, KubeVirt, and Longhorn. Designed for users looking for a flexible and affordable solution to run cloud-native and virtual machine (VM) workloads in your datacenter and at the edge, Harvester provides a single pane of glass for virtualization and cloud-native workload management.


Why Harvester?

Sits on the shoulders of cloud native giants

Harvester uses proven and mature open source software (OSS) components to build virtualization instead of proprietary kernels that are kept hidden from view.

Lower Total Cost of Ownership (TCO)

As 100% open source, Harvester is free from the costly license fees of other HCI solutions. Plus, its foundation is based on existing technology such as Linux and kernel-based virtual machines.

Integrate and prepare for the future

Built with cloud native components at its core, Harvester is future-proof as the infrastructure industry shifts toward containers, edge and multi-cloud software engineering.


How it Works

Harvester Architecture Diagram

Get Started

Want to try Harvester?

Simply install it directly onto your bare metal server to get started.

Download Now
- + \ No newline at end of file diff --git a/installation/harvester-configuration/index.html b/installation/harvester-configuration/index.html index e419e48d..78f9da3a 100644 --- a/installation/harvester-configuration/index.html +++ b/installation/harvester-configuration/index.html @@ -9,7 +9,7 @@ Harvester Configuration | The open-source hyperconverged infrastructure solution for a cloud-native world - + @@ -24,7 +24,7 @@ form is to just change your password on a Linux system and copy the value of the second field from /etc/shadow. You can also encrypt a password using openssl passwd -1.

Example

os:
password: "$1$tYtghCfK$QHa51MS6MVAcfUKuOzNKt0"

Or clear text

os:
password: supersecure

os.environment

Environment variables to be set on k3s and other processes like the boot process. Primary use of this field is to set the http proxy.

Example

os:
environment:
http_proxy: http://myserver
https_proxy: http://myserver

install.mode

Harvester installer mode:

  • create: Creating a new Harvester installer
  • join: Join an existing Harvester installer. Need to specify server_url.

Example

install:
mode: create

install.mgmtInterface

The interface that used to build VM fabric network.

Example

install:
mgmtInterface: eth0

install.force_efi

Force EFI installation even when EFI is not detected. Default: false.

install.device

The device to install the OS.

install.silent

Reserved.

install.iso_url

ISO to download and install from if booting from kernel/vmlinuz and not ISO.

install.poweroff

Shutdown the machine after install instead of rebooting

install.no_format

Do not partition and format, assume layout exists already.

install.debug

Run installation with more logging and configure debug for installed system.

install.tty

The tty device used for console.

Example

install:
tty: ttyS0,115200n8
- + \ No newline at end of file diff --git a/installation/iso-install/index.html b/installation/iso-install/index.html index d19a2961..d5c4b296 100644 --- a/installation/iso-install/index.html +++ b/installation/iso-install/index.html @@ -9,7 +9,7 @@ ISO Installation | The open-source hyperconverged infrastructure solution for a cloud-native world - + @@ -19,7 +19,7 @@ iso-installed.png
  • Configure the cluster token. This token will be used for adding other nodes to the cluster.
  • Configure the login password of the host. The default ssh user is rancher.
  • (Optional) you can choose to import SSH keys from a remote URL server. Your GitHub public keys can be used with https://github.com/<username>.keys.
  • (Optional) If you need to use an HTTP proxy to access the outside world, enter the proxy URL address here, otherwise, leave this blank.
  • (Optional) If you need to customize the host with cloud-init config, enter the HTTP URL here.
  • Confirm the installation options and the Harvester will be installed to your host. The installation may take a few minutes to be complete.
  • Once the installation is complete it will restart the host and a console UI with management URL and status will be displayed. (You can Use F12 to switch between Harvester console and the Shell)
  • The default URL of the web interface is https://your-host-ip:30443. iso-installed.png
  • User will be prompted to set the password for the default admin user on the first-time login. first-login.png
  • - + \ No newline at end of file diff --git a/installation/pxe-boot-install/index.html b/installation/pxe-boot-install/index.html index 79010f66..a04c23fa 100644 --- a/installation/pxe-boot-install/index.html +++ b/installation/pxe-boot-install/index.html @@ -9,13 +9,13 @@ PXE Boot Install | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    PXE Boot Install

    Starting from version 0.2.0, Harvester can be installed in a mass manner. This document provides an example to do the automatic installation with PXE boot.

    We recommend using iPXE to perform the network boot. It has more features than the traditional PXE Boot program and is likely available in modern NIC cards. If NIC cards don't come with iPXE firmware, iPXE firmware images can be loaded from the TFTP server first.

    To see sample iPXE scripts, please visit https://github.com/harvester/ipxe-examples.

    Preparing HTTP Servers

    An HTTP server is required to serve boot files. Please ensure these servers are set up correctly before continuing.

    Let's assume an NGINX HTTP server's IP is 10.100.0.10, and it serves /usr/share/nginx/html/ folder at the path http://10.100.0.10/.

    Preparing Boot Files

    • Download the required files from https://github.com/harvester/harvester/releases. Choose an appropriate version.

      • The ISO: harvester-amd64.iso
      • The kernel: harvester-vmlinuz-amd64
      • The initrd: harvester-initrd-amd64
    • Serve the files.

      Copy or move the downloaded files to an appropriate location so they can be downloaded via the HTTP server. e.g.,

      sudo mkdir -p /usr/share/nginx/html/harvester/
      sudo cp /path/to/harvester-amd64.iso /usr/share/nginx/html/harvester/
      sudo cp /path/to/harvester-vmlinuz-amd64 /usr/share/nginx/html/harvester/
      sudo cp /path/to/harvester-initrd-amd64 /usr/share/nginx/html/harvester/

    Preparing iPXE boot scripts

    When performing automatic installation, there are two modes:

    • CREATE: we are installing a node to construct an initial Harvester cluster.
    • JOIN: we are installing a node to join an existing Harvester cluster.

    Prerequisite

    Nodes need to have at least 8G of RAM because the full ISO file is loaded into tmpfs during the installation.

    CREATE mode

    ⚠️ Security Risks: The configuration file below contains credentials which should be kept secretly. Please do not make the configuration file publicly accessible at the moment.

    Create a Harvester configuration file config-create.yaml for CREATE mode. Modify the values as needed:

    # cat /usr/share/nginx/html/harvester/config-create.yaml
    token: token
    os:
    hostname: node1
    ssh_authorized_keys:
    - ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQDbeUa9A7Kee+hcCleIXYxuaPksn2m4PZTd4T7wPcse8KbsQfttGRax6vxQXoPO6ehddqOb2nV7tkW2mEhR50OE7W7ngDHbzK2OneAyONYF44bmMsapNAGvnsBKe9rNrev1iVBwOjtmyVLhnLrJIX+2+3T3yauxdu+pmBsnD5OIKUrBrN1sdwW0rA2rHDiSnzXHNQM3m02aY6mlagdQ/Ovh96h05QFCHYxBc6oE/mIeFRaNifa4GU/oELn3a6HfbETeBQz+XOEN+IrLpnZO9riGyzsZroB/Y3Ju+cJxH06U0B7xwJCRmWZjuvfFQUP7RIJD1gRGZzmf3h8+F+oidkO2i5rbT57NaYSqkdVvR6RidVLWEzURZIGbtHjSPCi4kqD05ua8r/7CC0PvxQb1O5ILEdyJr2ZmzhF6VjjgmyrmSmt/yRq8MQtGQxyKXZhJqlPYho4d5SrHi5iGT2PvgDQaWch0I3ndEicaaPDZJHWBxVsCVAe44Wtj9g3LzXkyu3k= root@admin
    password: rancher
    install:
    mode: create
    mgmt_interface: eth0
    device: /dev/sda
    iso_url: http://10.100.0.10/harvester-amd64.iso

    For machines that needs to be installed as CREATE mode, the following is an iPXE script that boots the kernel with the above config:

    #!ipxe
    kernel vmlinuz k3os.mode=install console=ttyS0 console=tty1 harvester.install.automatic=true harvester.install.config_url=http://10.100.0.10/harvester/config-create.yaml
    initrd initrd
    boot

    Let's assume the iPXE script is stored in /usr/share/nginx/html/harvester/ipxe-create

    JOIN mode

    ⚠️ Security Risks: The configuration file below contains credentials which should be kept secretly. Please do not make the configuration file publicly accessible at the moment.

    Create a Harvester configuration file config-join.yaml for JOIN mode. Modify the values as needed:

    # cat /usr/share/nginx/html/harvester/config-join.yaml
    server_url: https://10.100.0.130:6443
    token: token
    os:
    hostname: node2
    ssh_authorized_keys:
    - ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQDbeUa9A7Kee+hcCleIXYxuaPksn2m4PZTd4T7wPcse8KbsQfttGRax6vxQXoPO6ehddqOb2nV7tkW2mEhR50OE7W7ngDHbzK2OneAyONYF44bmMsapNAGvnsBKe9rNrev1iVBwOjtmyVLhnLrJIX+2+3T3yauxdu+pmBsnD5OIKUrBrN1sdwW0rA2rHDiSnzXHNQM3m02aY6mlagdQ/Ovh96h05QFCHYxBc6oE/mIeFRaNifa4GU/oELn3a6HfbETeBQz+XOEN+IrLpnZO9riGyzsZroB/Y3Ju+cJxH06U0B7xwJCRmWZjuvfFQUP7RIJD1gRGZzmf3h8+F+oidkO2i5rbT57NaYSqkdVvR6RidVLWEzURZIGbtHjSPCi4kqD05ua8r/7CC0PvxQb1O5ILEdyJr2ZmzhF6VjjgmyrmSmt/yRq8MQtGQxyKXZhJqlPYho4d5SrHi5iGT2PvgDQaWch0I3ndEicaaPDZJHWBxVsCVAe44Wtj9g3LzXkyu3k= root@admin
    dns_nameservers:
    - 1.1.1.1
    - 8.8.8.8
    password: rancher
    install:
    mode: join
    mgmt_interface: eth0
    device: /dev/sda
    iso_url: http://10.100.0.10/harvester/harvester-amd64.iso

    Note that the mode is join and the server_url needs to be provided.

    For machines that needs to be installed in JOIN mode, the following is an iPXE script that boots the kernel with the above config:

    #!ipxe
    kernel vmlinuz k3os.mode=install console=ttyS0 console=tty1 harvester.install.automatic=true harvester.install.config_url=http://10.100.0.10/harvester/config-join.yaml
    initrd initrd
    boot

    Let's assume the iPXE script is stored in /usr/share/nginx/html/harvester/ipxe-join.

    TROUBLESHOOTING

    • Sometimes the installer might be not able to fetch the Harvester configuration file because the network stack is not ready yet. To work around this, please add a boot_cmd parameter to the iPXE script, e.g.,

      #!ipxe
      kernel vmlinuz k3os.mode=install console=ttyS0 console=tty1 harvester.install.automatic=true harvester.install.config_url=http://10.100.0.10/harvester/config-join.yaml boot_cmd="echo include_ping_test=yes >> /etc/conf.d/net-online"
      initrd initrd
      boot

    DHCP server configuration

    Here is an example to configure the ISC DHCP server to offer iPXE scripts:

    option architecture-type code 93 = unsigned integer 16;

    subnet 10.100.0.0 netmask 255.255.255.0 {
    option routers 10.100.0.10;
    option domain-name-servers 192.168.2.1;
    range 10.100.0.100 10.100.0.253;
    }

    group {
    # create group
    if exists user-class and option user-class = "iPXE" {
    # iPXE Boot
    if option architecture-type = 00:07 {
    filename "http://10.100.0.10/harvester/ipxe-create-efi";
    } else {
    filename "http://10.100.0.10/harvester/ipxe-create";
    }
    } else {
    # PXE Boot
    if option architecture-type = 00:07 {
    # UEFI
    filename "ipxe.efi";
    } else {
    # Non-UEFI
    filename "undionly.kpxe";
    }
    }

    host node1 { hardware ethernet 52:54:00:6b:13:e2; }
    }


    group {
    # join group
    if exists user-class and option user-class = "iPXE" {
    # iPXE Boot
    if option architecture-type = 00:07 {
    filename "http://10.100.0.10/harvester/ipxe-join-efi";
    } else {
    filename "http://10.100.0.10/harvester/ipxe-join";
    }
    } else {
    # PXE Boot
    if option architecture-type = 00:07 {
    # UEFI
    filename "ipxe.efi";
    } else {
    # Non-UEFI
    filename "undionly.kpxe";
    }
    }

    host node2 { hardware ethernet 52:54:00:69:d5:92; }
    }

    The config file declares a subnet and two groups. The first group is for hosts to boot with CREATE mode and the other one is for JOIN mode. By default, the iPXE path is chosen, but if it sees a PXE client, it also offers the iPXE image according to client architecture. Please prepare those images and a tftp server first.

    Harvester configuration

    For more information about Harvester configuration, please refer to the Harvester configuration.

    Users can also provide configuration via kernel parameters. For example, to specify the CREATE install mode, the user can pass the harvester.install.mode=create kernel parameter when booting. Values passed through kernel parameters have higher priority than values specified in the config file.

    UEFI HTTP Boot support

    UEFI firmware supports loading a boot image from HTTP server. This section demonstrates how to use UEFI HTTP boot to load the iPXE program and perform the automatic installation.

    Serve the iPXE program

    Download the iPXE uefi program from http://boot.ipxe.org/ipxe.efi and make ipxe.efi can be downloaded from the HTTP server. e.g.:

    cd /usr/share/nginx/html/harvester/
    wget http://boot.ipxe.org/ipxe.efi

    The file now can be downloaded from http://10.100.0.10/harvester/ipxe.efi

    DHCP server configuration

    If the user plans to use the UEFI HTTP boot feature by getting a dynamic IP first, the DHCP server needs to provides the iPXE program URL when it sees such a request. Here is an updated ISC DHCP server group example:

    group {
    # create group
    if exists user-class and option user-class = "iPXE" {
    # iPXE Boot
    if option architecture-type = 00:07 {
    filename "http://10.100.0.10/harvester/ipxe-create-efi";
    } else {
    filename "http://10.100.0.10/harvester/ipxe-create";
    }
    } elsif substring (option vendor-class-identifier, 0, 10) = "HTTPClient" {
    # UEFI HTTP Boot
    option vendor-class-identifier "HTTPClient";
    filename "http://10.100.0.10/harvester/ipxe.efi";
    } else {
    # PXE Boot
    if option architecture-type = 00:07 {
    # UEFI
    filename "ipxe.efi";
    } else {
    # Non-UEFI
    filename "undionly.kpxe";
    }
    }

    host node1 { hardware ethernet 52:54:00:6b:13:e2; }
    }

    The elsif substring statement is new, and it offers http://10.100.0.10/harvester/ipxe.efi when it sees a UEFI HTTP boot DHCP request. After the client fetches the iPXE program and runs it, the iPXE program will send a DHCP request again and load the iPXE script from URL http://10.100.0.10/harvester/ipxe-create-efi.

    The iPXE script for UEFI boot

    It's mandatory to specify the initrd image for UEFI boot in the kernel parameters. Here is an updated version of iPXE script for CREATE mode.

    #!ipxe
    kernel vmlinuz initrd=initrd k3os.mode=install console=ttyS0 console=tty1 harvester.install.automatic=true harvester.install.config_url=http://10.100.0.10/harvester/config-create.yaml
    initrd initrd
    boot

    The parameter initrd=initrd is required for initrd to be chrooted.

    - + \ No newline at end of file diff --git a/intro/index.html b/intro/index.html index dc8030b6..2fa4f9ec 100644 --- a/intro/index.html +++ b/intro/index.html @@ -9,7 +9,7 @@ Harvester Intro | The open-source hyperconverged infrastructure solution for a cloud-native world - + @@ -19,7 +19,7 @@ iso-installed.png
  • Configure the cluster token. This token will be used for adding other nodes to the cluster.
  • Configure the login password of the host. The default ssh user is rancher.
  • (Optional) you can choose to import SSH keys from a remote URL server. Your GitHub public keys can be used with https://github.com/<username>.keys.
  • (Optional) If you need to use an HTTP proxy to access the outside world, enter the proxy URL address here, otherwise, leave this blank.
  • (Optional) If you need to customize the host with cloud-init config, enter the HTTP URL here.
  • Confirm the installation options and the Harvester will be installed to your host. The installation may take a few minutes to be complete.
  • Once the installation is complete it will restart the host and a console UI with management URL and status will be displayed. (You can Use F12 to switch between Harvester console and the Shell)
  • The default URL of the web interface is https://your-host-ip:30443. iso-installed.png
  • User will be prompted to set the password for the default admin user on the first-time login. first-login.png
  • - + \ No newline at end of file diff --git a/kb/archive/index.html b/kb/archive/index.html index b3e1932d..8dab83e8 100644 --- a/kb/archive/index.html +++ b/kb/archive/index.html @@ -9,13 +9,13 @@ Archive | The open-source hyperconverged infrastructure solution for a cloud-native world - + - + \ No newline at end of file diff --git a/kb/atom.xml b/kb/atom.xml index b99c8310..c63a1beb 100644 --- a/kb/atom.xml +++ b/kb/atom.xml @@ -13,7 +13,7 @@ 2024-01-23T00:00:00.000Z - Harvester calculates the resource metrics using data that is dynamically collected from the system. Host-level resource metrics are calculated and then aggregated to obtain the cluster-level metrics.

    You can view resource-related metrics on the Harvester UI.

    • Hosts screen: Displays host-level metrics

      host level resources metrics

    • Dashboard screen: Displays cluster-level metrics

      cluster level resources metrics

    CPU and Memory

    The following sections describe the data sources and calculation methods for CPU and memory resources.

    • Resource capacity: Baseline data
    • Resource usage: Data source for the Used field on the Hosts screen
    • Resource reservation: Data source for the Reserved field on the Hosts screen

    Resource Capacity

    In Kubernetes, a Node object is created for each host.

    The .status.allocatable.cpu and .status.allocatable.memory represent the available CPU and Memory resources of a host.

    # kubectl get nodes -A -oyaml
    apiVersion: v1
    items:
    - apiVersion: v1
    kind: Node
    metadata:
    ..
    management.cattle.io/pod-limits: '{"cpu":"12715m","devices.kubevirt.io/kvm":"1","devices.kubevirt.io/tun":"1","devices.kubevirt.io/vhost-net":"1","memory":"17104951040"}'
    management.cattle.io/pod-requests: '{"cpu":"5657m","devices.kubevirt.io/kvm":"1","devices.kubevirt.io/tun":"1","devices.kubevirt.io/vhost-net":"1","ephemeral-storage":"50M","memory":"9155862208","pods":"78"}'
    node.alpha.kubernetes.io/ttl: "0"
    ..
    name: harv41
    resourceVersion: "2170215"
    uid: b6f5850a-2fbc-4aef-8fbe-121dfb671b67
    spec:
    podCIDR: 10.52.0.0/24
    podCIDRs:
    - 10.52.0.0/24
    providerID: rke2://harv41
    status:
    addresses:
    - address: 192.168.122.141
    type: InternalIP
    - address: harv41
    type: Hostname
    allocatable:
    cpu: "10"
    devices.kubevirt.io/kvm: 1k
    devices.kubevirt.io/tun: 1k
    devices.kubevirt.io/vhost-net: 1k
    ephemeral-storage: "149527126718"
    hugepages-1Gi: "0"
    hugepages-2Mi: "0"
    memory: 20464216Ki
    pods: "200"
    capacity:
    cpu: "10"
    devices.kubevirt.io/kvm: 1k
    devices.kubevirt.io/tun: 1k
    devices.kubevirt.io/vhost-net: 1k
    ephemeral-storage: 153707984Ki
    hugepages-1Gi: "0"
    hugepages-2Mi: "0"
    memory: 20464216Ki
    pods: "200"

    Resource Usage

    CPU and memory usage data is continuously collected and stored in the NodeMetrics object. Harvester reads the data from usage.cpu and usage.memory.

    # kubectl get NodeMetrics -A -oyaml
    apiVersion: v1
    items:
    - apiVersion: metrics.k8s.io/v1beta1
    kind: NodeMetrics
    metadata:
    ...
    name: harv41
    timestamp: "2024-01-23T12:04:44Z"
    usage:
    cpu: 891736742n
    memory: 9845008Ki
    window: 10.149s

    Resource Reservation

    Harvester dynamically calculates the resource limits and requests of all pods running on a host, and updates the information to the annotations of the NodeMetrics object.

          management.cattle.io/pod-limits: '{"cpu":"12715m",...,"memory":"17104951040"}'
    management.cattle.io/pod-requests: '{"cpu":"5657m",...,"memory":"9155862208"}'

    For more information, see Requests and Limits in the Kubernetes documentation.

    Storage

    Longhorn is the default Container Storage Interface (CSI) driver of Harvester, providing storage management features such as distributed block storage and tiering.

    Reserved Storage in Longhorn

    Longhorn allows you to specify the percentage of disk space that is not allocated to the default disk on each new Longhorn node. The default value is "30". For more information, see Storage Reserved Percentage For Default Disk in the Longhorn documentation.

    Depending on the disk size, you can modify the default value using the embedded Longhorn UI.

    ::: note

    Before changing the settings, read the Longhorn documentation carefully.

    :::

    Data Sources and Calculation

    Harvester uses the following data to calculate metrics for storage resources.

    • Sum of the storageMaximum values of all disks (status.diskStatus.disk-name): Total storage capacity

    • Total storage capacity - Sum of the storageAvailable values of all disks (status.diskStatus.disk-name): Data source for the Used field on the Hosts screen

    • Sum of the storageReserved values of all disks (spec.disks): Data source for the Reserved field on the Hosts screen

    # kubectl get nodes.longhorn.io -n longhorn-system -oyaml

    apiVersion: v1
    items:
    - apiVersion: longhorn.io/v1beta2
    kind: Node
    metadata:
    ..
    name: harv41
    namespace: longhorn-system
    ..
    spec:
    allowScheduling: true
    disks:
    default-disk-ef11a18c36b01132:
    allowScheduling: true
    diskType: filesystem
    evictionRequested: false
    path: /var/lib/harvester/defaultdisk
    storageReserved: 24220101427
    tags: []
    ..
    status:
    ..
    diskStatus:
    default-disk-ef11a18c36b01132:
    ..
    diskType: filesystem
    diskUUID: d2788933-8817-44c6-b688-dee414cc1f73
    scheduledReplica:
    pvc-95561210-c39c-4c2e-ac9a-4a9bd72b3100-r-20affeca: 2147483648
    pvc-9e83b2dc-6a4b-4499-ba70-70dc25b2d9aa-r-4ad05c86: 32212254720
    pvc-bc25be1e-ca4e-4818-a16d-48353a0f2f96-r-c7b88c60: 3221225472
    pvc-d9d3e54d-8d67-4740-861e-6373f670f1e4-r-f4c7c338: 2147483648
    pvc-e954b5fe-bbd7-4d44-9866-6ff6684d5708-r-ba6b87b6: 5368709120
    storageAvailable: 77699481600
    storageMaximum: 80733671424
    storageScheduled: 45097156608
    region: ""
    snapshotCheckStatus: {}
    zone: ""
    ]]>
    + Harvester calculates the resource metrics using data that is dynamically collected from the system. Host-level resource metrics are calculated and then aggregated to obtain the cluster-level metrics.

    You can view resource-related metrics on the Harvester UI.

    • Hosts screen: Displays host-level metrics

      host level resources metrics

    • Dashboard screen: Displays cluster-level metrics

      cluster level resources metrics

    CPU and Memory

    The following sections describe the data sources and calculation methods for CPU and memory resources.

    • Resource capacity: Baseline data
    • Resource usage: Data source for the Used field on the Hosts screen
    • Resource reservation: Data source for the Reserved field on the Hosts screen

    Resource Capacity

    In Kubernetes, a Node object is created for each host.

    The .status.allocatable.cpu and .status.allocatable.memory represent the available CPU and Memory resources of a host.

    # kubectl get nodes -A -oyaml
    apiVersion: v1
    items:
    - apiVersion: v1
    kind: Node
    metadata:
    ..
    management.cattle.io/pod-limits: '{"cpu":"12715m","devices.kubevirt.io/kvm":"1","devices.kubevirt.io/tun":"1","devices.kubevirt.io/vhost-net":"1","memory":"17104951040"}'
    management.cattle.io/pod-requests: '{"cpu":"5657m","devices.kubevirt.io/kvm":"1","devices.kubevirt.io/tun":"1","devices.kubevirt.io/vhost-net":"1","ephemeral-storage":"50M","memory":"9155862208","pods":"78"}'
    node.alpha.kubernetes.io/ttl: "0"
    ..
    name: harv41
    resourceVersion: "2170215"
    uid: b6f5850a-2fbc-4aef-8fbe-121dfb671b67
    spec:
    podCIDR: 10.52.0.0/24
    podCIDRs:
    - 10.52.0.0/24
    providerID: rke2://harv41
    status:
    addresses:
    - address: 192.168.122.141
    type: InternalIP
    - address: harv41
    type: Hostname
    allocatable:
    cpu: "10"
    devices.kubevirt.io/kvm: 1k
    devices.kubevirt.io/tun: 1k
    devices.kubevirt.io/vhost-net: 1k
    ephemeral-storage: "149527126718"
    hugepages-1Gi: "0"
    hugepages-2Mi: "0"
    memory: 20464216Ki
    pods: "200"
    capacity:
    cpu: "10"
    devices.kubevirt.io/kvm: 1k
    devices.kubevirt.io/tun: 1k
    devices.kubevirt.io/vhost-net: 1k
    ephemeral-storage: 153707984Ki
    hugepages-1Gi: "0"
    hugepages-2Mi: "0"
    memory: 20464216Ki
    pods: "200"

    Resource Usage

    CPU and memory usage data is continuously collected and stored in the NodeMetrics object. Harvester reads the data from usage.cpu and usage.memory.

    # kubectl get NodeMetrics -A -oyaml
    apiVersion: v1
    items:
    - apiVersion: metrics.k8s.io/v1beta1
    kind: NodeMetrics
    metadata:
    ...
    name: harv41
    timestamp: "2024-01-23T12:04:44Z"
    usage:
    cpu: 891736742n
    memory: 9845008Ki
    window: 10.149s

    Resource Reservation

    Harvester dynamically calculates the resource limits and requests of all pods running on a host, and updates the information to the annotations of the NodeMetrics object.

          management.cattle.io/pod-limits: '{"cpu":"12715m",...,"memory":"17104951040"}'
    management.cattle.io/pod-requests: '{"cpu":"5657m",...,"memory":"9155862208"}'

    For more information, see Requests and Limits in the Kubernetes documentation.

    Storage

    Longhorn is the default Container Storage Interface (CSI) driver of Harvester, providing storage management features such as distributed block storage and tiering.

    Reserved Storage in Longhorn

    Longhorn allows you to specify the percentage of disk space that is not allocated to the default disk on each new Longhorn node. The default value is "30". For more information, see Storage Reserved Percentage For Default Disk in the Longhorn documentation.

    Depending on the disk size, you can modify the default value using the embedded Longhorn UI.

    note

    Before changing the settings, read the Longhorn documentation carefully.

    Data Sources and Calculation

    Harvester uses the following data to calculate metrics for storage resources.

    • Sum of the storageMaximum values of all disks (status.diskStatus.disk-name): Total storage capacity

    • Total storage capacity - Sum of the storageAvailable values of all disks (status.diskStatus.disk-name): Data source for the Used field on the Hosts screen

    • Sum of the storageReserved values of all disks (spec.disks): Data source for the Reserved field on the Hosts screen

    # kubectl get nodes.longhorn.io -n longhorn-system -oyaml

    apiVersion: v1
    items:
    - apiVersion: longhorn.io/v1beta2
    kind: Node
    metadata:
    ..
    name: harv41
    namespace: longhorn-system
    ..
    spec:
    allowScheduling: true
    disks:
    default-disk-ef11a18c36b01132:
    allowScheduling: true
    diskType: filesystem
    evictionRequested: false
    path: /var/lib/harvester/defaultdisk
    storageReserved: 24220101427
    tags: []
    ..
    status:
    ..
    diskStatus:
    default-disk-ef11a18c36b01132:
    ..
    diskType: filesystem
    diskUUID: d2788933-8817-44c6-b688-dee414cc1f73
    scheduledReplica:
    pvc-95561210-c39c-4c2e-ac9a-4a9bd72b3100-r-20affeca: 2147483648
    pvc-9e83b2dc-6a4b-4499-ba70-70dc25b2d9aa-r-4ad05c86: 32212254720
    pvc-bc25be1e-ca4e-4818-a16d-48353a0f2f96-r-c7b88c60: 3221225472
    pvc-d9d3e54d-8d67-4740-861e-6373f670f1e4-r-f4c7c338: 2147483648
    pvc-e954b5fe-bbd7-4d44-9866-6ff6684d5708-r-ba6b87b6: 5368709120
    storageAvailable: 77699481600
    storageMaximum: 80733671424
    storageScheduled: 45097156608
    region: ""
    snapshotCheckStatus: {}
    zone: ""
    ]]>
    Jian Wang https://github.com/w13915984028 diff --git a/kb/best_practices_for_optimizing_longhorn_disk_performance/index.html b/kb/best_practices_for_optimizing_longhorn_disk_performance/index.html index 7ade4684..652fdc7f 100644 --- a/kb/best_practices_for_optimizing_longhorn_disk_performance/index.html +++ b/kb/best_practices_for_optimizing_longhorn_disk_performance/index.html @@ -9,13 +9,13 @@ Best Practices for Optimizing Longhorn Disk Performance | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    Best Practices for Optimizing Longhorn Disk Performance

    · 2 min read
    David Ko
    Jillian Maroket

    The Longhorn documentation provides best practice recommendations for deploying Longhorn in production environments. Before configuring workloads, ensure that you have set up the following basic requirements for optimal disk performance.

    • SATA/NVMe SSDs or disk drives with similar performance
    • 10 Gbps network bandwidth between nodes
    • Dedicated Priority Classes for system-managed and user-deployed Longhorn components

    The following sections outline other recommendations for achieving optimal disk performance.

    IO Performance

    • Storage network: Use a dedicated storage network to improve IO performance and stability.

    • Longhorn disk: Use a dedicated disk for Longhorn storage instead of using the root disk.

    • Replica count: Set the default replica count to "2" to achieve data availability with better disk space usage or less impact to system performance. This practice is especially beneficial to data-intensive applications.

    • Storage tag: Use storage tags to define storage tiering for data-intensive applications. For example, only high-performance disks can be used for storing performance-sensitive data. You can either add disks with tags or create StorageClasses with tags.

    • Data locality: Use best-effort as the default data locality of Longhorn Storage Classes.

      For applications that support data replication (for example, a distributed database), you can use the strict-local option to ensure that only one replica is created for each volume. This practice prevents the extra disk space usage and IO performance overhead associated with volume replication.

      For data-intensive applications, you can use pod scheduling functions such as node selector or taint toleration. These functions allow you to schedule the workload to a specific storage-tagged node together with one replica.

    Space Efficiency

    • Recurring snapshots: Periodically clean up system-generated snapshots and retain only the number of snapshots that makes sense for your implementation.

      For applications with replication capability, periodically delete all types of snapshots.

    Disaster Recovery

    • Recurring backups: Create recurring backup jobs for mission-critical application volumes.

    • System backup: Run periodic system backups.

    - + \ No newline at end of file diff --git a/kb/calculation_of_resource_metrics_in_harvester/index.html b/kb/calculation_of_resource_metrics_in_harvester/index.html index 2d11180e..af310745 100644 --- a/kb/calculation_of_resource_metrics_in_harvester/index.html +++ b/kb/calculation_of_resource_metrics_in_harvester/index.html @@ -9,13 +9,13 @@ Calculation of Resource Metrics in Harvester | The open-source hyperconverged infrastructure solution for a cloud-native world - +
    -

    Calculation of Resource Metrics in Harvester

    · 3 min read
    Jian Wang

    Harvester calculates the resource metrics using data that is dynamically collected from the system. Host-level resource metrics are calculated and then aggregated to obtain the cluster-level metrics.

    You can view resource-related metrics on the Harvester UI.

    • Hosts screen: Displays host-level metrics

      host level resources metrics

    • Dashboard screen: Displays cluster-level metrics

      cluster level resources metrics

    CPU and Memory

    The following sections describe the data sources and calculation methods for CPU and memory resources.

    • Resource capacity: Baseline data
    • Resource usage: Data source for the Used field on the Hosts screen
    • Resource reservation: Data source for the Reserved field on the Hosts screen

    Resource Capacity

    In Kubernetes, a Node object is created for each host.

    The .status.allocatable.cpu and .status.allocatable.memory represent the available CPU and Memory resources of a host.

    # kubectl get nodes -A -oyaml
    apiVersion: v1
    items:
    - apiVersion: v1
    kind: Node
    metadata:
    ..
    management.cattle.io/pod-limits: '{"cpu":"12715m","devices.kubevirt.io/kvm":"1","devices.kubevirt.io/tun":"1","devices.kubevirt.io/vhost-net":"1","memory":"17104951040"}'
    management.cattle.io/pod-requests: '{"cpu":"5657m","devices.kubevirt.io/kvm":"1","devices.kubevirt.io/tun":"1","devices.kubevirt.io/vhost-net":"1","ephemeral-storage":"50M","memory":"9155862208","pods":"78"}'
    node.alpha.kubernetes.io/ttl: "0"
    ..
    name: harv41
    resourceVersion: "2170215"
    uid: b6f5850a-2fbc-4aef-8fbe-121dfb671b67
    spec:
    podCIDR: 10.52.0.0/24
    podCIDRs:
    - 10.52.0.0/24
    providerID: rke2://harv41
    status:
    addresses:
    - address: 192.168.122.141
    type: InternalIP
    - address: harv41
    type: Hostname
    allocatable:
    cpu: "10"
    devices.kubevirt.io/kvm: 1k
    devices.kubevirt.io/tun: 1k
    devices.kubevirt.io/vhost-net: 1k
    ephemeral-storage: "149527126718"
    hugepages-1Gi: "0"
    hugepages-2Mi: "0"
    memory: 20464216Ki
    pods: "200"
    capacity:
    cpu: "10"
    devices.kubevirt.io/kvm: 1k
    devices.kubevirt.io/tun: 1k
    devices.kubevirt.io/vhost-net: 1k
    ephemeral-storage: 153707984Ki
    hugepages-1Gi: "0"
    hugepages-2Mi: "0"
    memory: 20464216Ki
    pods: "200"

    Resource Usage

    CPU and memory usage data is continuously collected and stored in the NodeMetrics object. Harvester reads the data from usage.cpu and usage.memory.

    # kubectl get NodeMetrics -A -oyaml
    apiVersion: v1
    items:
    - apiVersion: metrics.k8s.io/v1beta1
    kind: NodeMetrics
    metadata:
    ...
    name: harv41
    timestamp: "2024-01-23T12:04:44Z"
    usage:
    cpu: 891736742n
    memory: 9845008Ki
    window: 10.149s

    Resource Reservation

    Harvester dynamically calculates the resource limits and requests of all pods running on a host, and updates the information to the annotations of the NodeMetrics object.

          management.cattle.io/pod-limits: '{"cpu":"12715m",...,"memory":"17104951040"}'
    management.cattle.io/pod-requests: '{"cpu":"5657m",...,"memory":"9155862208"}'

    For more information, see Requests and Limits in the Kubernetes documentation.

    Storage

    Longhorn is the default Container Storage Interface (CSI) driver of Harvester, providing storage management features such as distributed block storage and tiering.

    Reserved Storage in Longhorn

    Longhorn allows you to specify the percentage of disk space that is not allocated to the default disk on each new Longhorn node. The default value is "30". For more information, see Storage Reserved Percentage For Default Disk in the Longhorn documentation.

    Depending on the disk size, you can modify the default value using the embedded Longhorn UI.

    ::: note

    Before changing the settings, read the Longhorn documentation carefully.

    :::

    Data Sources and Calculation

    Harvester uses the following data to calculate metrics for storage resources.

    • Sum of the storageMaximum values of all disks (status.diskStatus.disk-name): Total storage capacity

    • Total storage capacity - Sum of the storageAvailable values of all disks (status.diskStatus.disk-name): Data source for the Used field on the Hosts screen

    • Sum of the storageReserved values of all disks (spec.disks): Data source for the Reserved field on the Hosts screen

    # kubectl get nodes.longhorn.io -n longhorn-system -oyaml

    apiVersion: v1
    items:
    - apiVersion: longhorn.io/v1beta2
    kind: Node
    metadata:
    ..
    name: harv41
    namespace: longhorn-system
    ..
    spec:
    allowScheduling: true
    disks:
    default-disk-ef11a18c36b01132:
    allowScheduling: true
    diskType: filesystem
    evictionRequested: false
    path: /var/lib/harvester/defaultdisk
    storageReserved: 24220101427
    tags: []
    ..
    status:
    ..
    diskStatus:
    default-disk-ef11a18c36b01132:
    ..
    diskType: filesystem
    diskUUID: d2788933-8817-44c6-b688-dee414cc1f73
    scheduledReplica:
    pvc-95561210-c39c-4c2e-ac9a-4a9bd72b3100-r-20affeca: 2147483648
    pvc-9e83b2dc-6a4b-4499-ba70-70dc25b2d9aa-r-4ad05c86: 32212254720
    pvc-bc25be1e-ca4e-4818-a16d-48353a0f2f96-r-c7b88c60: 3221225472
    pvc-d9d3e54d-8d67-4740-861e-6373f670f1e4-r-f4c7c338: 2147483648
    pvc-e954b5fe-bbd7-4d44-9866-6ff6684d5708-r-ba6b87b6: 5368709120
    storageAvailable: 77699481600
    storageMaximum: 80733671424
    storageScheduled: 45097156608
    region: ""
    snapshotCheckStatus: {}
    zone: ""
    - +

    Calculation of Resource Metrics in Harvester

    · 3 min read
    Jian Wang

    Harvester calculates the resource metrics using data that is dynamically collected from the system. Host-level resource metrics are calculated and then aggregated to obtain the cluster-level metrics.

    You can view resource-related metrics on the Harvester UI.

    • Hosts screen: Displays host-level metrics

      host level resources metrics

    • Dashboard screen: Displays cluster-level metrics

      cluster level resources metrics

    CPU and Memory

    The following sections describe the data sources and calculation methods for CPU and memory resources.

    • Resource capacity: Baseline data
    • Resource usage: Data source for the Used field on the Hosts screen
    • Resource reservation: Data source for the Reserved field on the Hosts screen

    Resource Capacity

    In Kubernetes, a Node object is created for each host.

    The .status.allocatable.cpu and .status.allocatable.memory represent the available CPU and Memory resources of a host.

    # kubectl get nodes -A -oyaml
    apiVersion: v1
    items:
    - apiVersion: v1
    kind: Node
    metadata:
    ..
    management.cattle.io/pod-limits: '{"cpu":"12715m","devices.kubevirt.io/kvm":"1","devices.kubevirt.io/tun":"1","devices.kubevirt.io/vhost-net":"1","memory":"17104951040"}'
    management.cattle.io/pod-requests: '{"cpu":"5657m","devices.kubevirt.io/kvm":"1","devices.kubevirt.io/tun":"1","devices.kubevirt.io/vhost-net":"1","ephemeral-storage":"50M","memory":"9155862208","pods":"78"}'
    node.alpha.kubernetes.io/ttl: "0"
    ..
    name: harv41
    resourceVersion: "2170215"
    uid: b6f5850a-2fbc-4aef-8fbe-121dfb671b67
    spec:
    podCIDR: 10.52.0.0/24
    podCIDRs:
    - 10.52.0.0/24
    providerID: rke2://harv41
    status:
    addresses:
    - address: 192.168.122.141
    type: InternalIP
    - address: harv41
    type: Hostname
    allocatable:
    cpu: "10"
    devices.kubevirt.io/kvm: 1k
    devices.kubevirt.io/tun: 1k
    devices.kubevirt.io/vhost-net: 1k
    ephemeral-storage: "149527126718"
    hugepages-1Gi: "0"
    hugepages-2Mi: "0"
    memory: 20464216Ki
    pods: "200"
    capacity:
    cpu: "10"
    devices.kubevirt.io/kvm: 1k
    devices.kubevirt.io/tun: 1k
    devices.kubevirt.io/vhost-net: 1k
    ephemeral-storage: 153707984Ki
    hugepages-1Gi: "0"
    hugepages-2Mi: "0"
    memory: 20464216Ki
    pods: "200"

    Resource Usage

    CPU and memory usage data is continuously collected and stored in the NodeMetrics object. Harvester reads the data from usage.cpu and usage.memory.

    # kubectl get NodeMetrics -A -oyaml
    apiVersion: v1
    items:
    - apiVersion: metrics.k8s.io/v1beta1
    kind: NodeMetrics
    metadata:
    ...
    name: harv41
    timestamp: "2024-01-23T12:04:44Z"
    usage:
    cpu: 891736742n
    memory: 9845008Ki
    window: 10.149s

    Resource Reservation

    Harvester dynamically calculates the resource limits and requests of all pods running on a host, and updates the information to the annotations of the NodeMetrics object.

          management.cattle.io/pod-limits: '{"cpu":"12715m",...,"memory":"17104951040"}'
    management.cattle.io/pod-requests: '{"cpu":"5657m",...,"memory":"9155862208"}'

    For more information, see Requests and Limits in the Kubernetes documentation.

    Storage

    Longhorn is the default Container Storage Interface (CSI) driver of Harvester, providing storage management features such as distributed block storage and tiering.

    Reserved Storage in Longhorn

    Longhorn allows you to specify the percentage of disk space that is not allocated to the default disk on each new Longhorn node. The default value is "30". For more information, see Storage Reserved Percentage For Default Disk in the Longhorn documentation.

    Depending on the disk size, you can modify the default value using the embedded Longhorn UI.

    note

    Before changing the settings, read the Longhorn documentation carefully.

    Data Sources and Calculation

    Harvester uses the following data to calculate metrics for storage resources.

    • Sum of the storageMaximum values of all disks (status.diskStatus.disk-name): Total storage capacity

    • Total storage capacity - Sum of the storageAvailable values of all disks (status.diskStatus.disk-name): Data source for the Used field on the Hosts screen

    • Sum of the storageReserved values of all disks (spec.disks): Data source for the Reserved field on the Hosts screen

    # kubectl get nodes.longhorn.io -n longhorn-system -oyaml

    apiVersion: v1
    items:
    - apiVersion: longhorn.io/v1beta2
    kind: Node
    metadata:
    ..
    name: harv41
    namespace: longhorn-system
    ..
    spec:
    allowScheduling: true
    disks:
    default-disk-ef11a18c36b01132:
    allowScheduling: true
    diskType: filesystem
    evictionRequested: false
    path: /var/lib/harvester/defaultdisk
    storageReserved: 24220101427
    tags: []
    ..
    status:
    ..
    diskStatus:
    default-disk-ef11a18c36b01132:
    ..
    diskType: filesystem
    diskUUID: d2788933-8817-44c6-b688-dee414cc1f73
    scheduledReplica:
    pvc-95561210-c39c-4c2e-ac9a-4a9bd72b3100-r-20affeca: 2147483648
    pvc-9e83b2dc-6a4b-4499-ba70-70dc25b2d9aa-r-4ad05c86: 32212254720
    pvc-bc25be1e-ca4e-4818-a16d-48353a0f2f96-r-c7b88c60: 3221225472
    pvc-d9d3e54d-8d67-4740-861e-6373f670f1e4-r-f4c7c338: 2147483648
    pvc-e954b5fe-bbd7-4d44-9866-6ff6684d5708-r-ba6b87b6: 5368709120
    storageAvailable: 77699481600
    storageMaximum: 80733671424
    storageScheduled: 45097156608
    region: ""
    snapshotCheckStatus: {}
    zone: ""
    + \ No newline at end of file diff --git a/kb/configure_priority_class_longhorn/index.html b/kb/configure_priority_class_longhorn/index.html index 29033471..551455dd 100644 --- a/kb/configure_priority_class_longhorn/index.html +++ b/kb/configure_priority_class_longhorn/index.html @@ -9,13 +9,13 @@ Configure PriorityClass on Longhorn System Components | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    Configure PriorityClass on Longhorn System Components

    · 7 min read
    Kiefer Chang

    Harvester v1.2.0 introduces a new enhancement where Longhorn system-managed components in newly-deployed clusters are automatically assigned a system-cluster-critical priority class by default. However, when upgrading your Harvester clusters from previous versions, you may notice that Longhorn system-managed components do not have any priority class set.

    This behavior is intentional and aimed at supporting zero-downtime upgrades. Longhorn does not allow changing the priority-class setting when attached volumes exist. For more details, please refer to Setting Priority Class During Longhorn Installation).

    This article explains how to manually configure priority classes for Longhorn system-managed components after upgrading your Harvester cluster, ensuring that your Longhorn components have the appropriate priority class assigned and maintaining the stability and performance of your system.

    Stop all virtual machines

    Stop all virtual machines (VMs) to detach all volumes. Please back up any work before doing this.

    1. Login to a Harvester controller node and become root.

    2. Get all running VMs and write down their namespaces and names:

      kubectl get vmi -A

      Alternatively, you can get this information by backing up the Virtual Machine Instance (VMI) manifests with the following command:

      kubectl get vmi -A -o json > vmi-backup.json
    3. Shut down all VMs. Log in to all running VMs and shut them down gracefully (recommended). Or use the following command to send shutdown signals to all VMs:

      kubectl get vmi -A -o json | jq -r '.items[] | [.metadata.name, .metadata.namespace] | @tsv' | while IFS=$'\t' read -r name namespace; do
      if [ -z "$name" ]; then
      break
      fi
      echo "Stop ${namespace}/${name}"
      virtctl stop $name -n $namespace
      done
      note

      You can also stop all VMs from the Harvester UI:

      1. Go to the Virtual Machines page.
      2. For each VM, select > Stop.
    4. Ensure there are no running VMs:

      Run the command:

      kubectl get vmi -A

      The above command must return:

      No resources found

    Scale down monitoring pods

    1. Scale down the Prometheus deployment. Run the following command and wait for all Prometheus pods to terminate:

      kubectl patch -n cattle-monitoring-system prometheus/rancher-monitoring-prometheus --patch '{"spec": {"replicas": 0}}' --type merge && \
      sleep 5 && \
      kubectl rollout status --watch=true -n cattle-monitoring-system statefulset/prometheus-rancher-monitoring-prometheus

      A sample output looks like this:

      prometheus.monitoring.coreos.com/rancher-monitoring-prometheus patched
      statefulset rolling update complete 0 pods at revision prometheus-rancher-monitoring-prometheus-cbf6bd5f7...
    2. Scale down the AlertManager deployment. Run the following command and wait for all AlertManager pods to terminate:

      kubectl patch -n cattle-monitoring-system alertmanager/rancher-monitoring-alertmanager --patch '{"spec": {"replicas": 0}}' --type merge && \
      sleep 5 && \
      kubectl rollout status --watch=true -n cattle-monitoring-system statefulset/alertmanager-rancher-monitoring-alertmanager

      A sample output looks like this:

      alertmanager.monitoring.coreos.com/rancher-monitoring-alertmanager patched
      statefulset rolling update complete 0 pods at revision alertmanager-rancher-monitoring-alertmanager-c8c459dff...
    3. Scale down the Grafana deployment. Run the following command and wait for all Grafana pods to terminate:

      kubectl scale --replicas=0 deployment/rancher-monitoring-grafana -n cattle-monitoring-system && \
      sleep 5 && \
      kubectl rollout status --watch=true -n cattle-monitoring-system deployment/rancher-monitoring-grafana

      A sample output looks like this:

      deployment.apps/rancher-monitoring-grafana scaled
      deployment "rancher-monitoring-grafana" successfully rolled out

    Scale down vm-import-controller pods

    1. Check if the vm-import-controller addon is enabled and configured with a persistent volume with the following command:

      kubectl get pvc -n harvester-system harvester-vm-import-controller

      If the above command returns an output like this, you must scale down the vm-import-controller pod. Otherwise, you can skip the following step.

      NAME                             STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLASS         AGE
      harvester-vm-import-controller Bound pvc-eb23e838-4c64-4650-bd8f-ba7075ab0559 200Gi RWO harvester-longhorn 2m53s
    2. Scale down the vm-import-controller pods with the following command:

      kubectl scale --replicas=0 deployment/harvester-vm-import-controller -n harvester-system && \
      sleep 5 && \
      kubectl rollout status --watch=true -n harvester-system deployment/harvester-vm-import-controller

      A sample output looks like this:

      deployment.apps/harvester-vm-import-controller scaled
      deployment "harvester-vm-import-controller" successfully rolled out

    Set the priority-class setting

    1. Before applying the priority-class setting, you need to verify all volumes are detached. Run the following command to verify the STATE of each volume is detached:

      kubectl get volumes.longhorn.io -A

      Verify the output looks like this:

      NAMESPACE         NAME                                       STATE      ROBUSTNESS   SCHEDULED   SIZE           NODE   AGE
      longhorn-system pvc-5743fd02-17a3-4403-b0d3-0e9b401cceed detached unknown 5368709120 15d
      longhorn-system pvc-7e389fe8-984c-4049-9ba8-5b797cb17278 detached unknown 53687091200 15d
      longhorn-system pvc-8df64e54-ecdb-4d4e-8bab-28d81e316b8b detached unknown 2147483648 15d
      longhorn-system pvc-eb23e838-4c64-4650-bd8f-ba7075ab0559 detached unknown 214748364800 11m
    2. Set the priority-class setting with the following command:

      kubectl patch -n longhorn-system settings.longhorn.io priority-class --patch '{"value": "system-cluster-critical"}' --type merge

      Longhorn system-managed pods will restart and then you need to check if all the system-managed components have a priority class set:

      Get the value of the priority class system-cluster-critical:

      kubectl get priorityclass system-cluster-critical

      Verify the output looks like this:

      NAME                      VALUE        GLOBAL-DEFAULT   AGE
      system-cluster-critical 2000000000 false 15d
    3. Use the following command to get pods' priority in the longhorn-system namespace:

      kubectl get pods -n longhorn-system -o custom-columns="Name":metadata.name,"Priority":.spec.priority
    4. Verify all system-managed components' pods have the correct priority. System-managed components include:

      • csi-attacher
      • csi-provisioner
      • csi-resizer
      • csi-snapshotter
      • engine-image-ei
      • instance-manager-e
      • instance-manager-r
      • longhorn-csi-plugin

    Scale up vm-import-controller pods

    If you scale down the vm-import-controller pods, you must scale it up again.

    1. Scale up the vm-import-controller pod. Run the command:

      kubectl scale --replicas=1 deployment/harvester-vm-import-controller -n harvester-system && \
      sleep 5 && \
      kubectl rollout status --watch=true -n harvester-system deployment/harvester-vm-import-controller

      A sample output looks like this:

      deployment.apps/harvester-vm-import-controller scaled
      Waiting for deployment "harvester-vm-import-controller" rollout to finish: 0 of 1 updated replicas are available...
      deployment "harvester-vm-import-controller" successfully rolled out
    2. Verify vm-import-controller is running using the following command:

      kubectl get pods --selector app.kubernetes.io/instance=vm-import-controller -A

      A sample output looks like this, the pod's STATUS must be Running:

      NAMESPACE          NAME                                              READY   STATUS    RESTARTS   AGE
      harvester-system harvester-vm-import-controller-6bd8f44f55-m9k86 1/1 Running 0 4m53s

    Scale up monitoring pods

    1. Scale up the Prometheus deployment. Run the following command and wait for all Prometheus pods to roll out:

      kubectl patch -n cattle-monitoring-system prometheus/rancher-monitoring-prometheus --patch '{"spec": {"replicas": 1}}' --type merge && \
      sleep 5 && \
      kubectl rollout status --watch=true -n cattle-monitoring-system statefulset/prometheus-rancher-monitoring-prometheus

      A sample output looks like:

      prometheus.monitoring.coreos.com/rancher-monitoring-prometheus patched
      Waiting for 1 pods to be ready...
      statefulset rolling update complete 1 pods at revision prometheus-rancher-monitoring-prometheus-cbf6bd5f7...
    2. Scale down the AlertManager deployment. Run the following command and wait for all AlertManager pods to roll out:

      kubectl patch -n cattle-monitoring-system alertmanager/rancher-monitoring-alertmanager --patch '{"spec": {"replicas": 1}}' --type merge && \
      sleep 5 && \
      kubectl rollout status --watch=true -n cattle-monitoring-system statefulset/alertmanager-rancher-monitoring-alertmanager

      A sample output looks like this:

      alertmanager.monitoring.coreos.com/rancher-monitoring-alertmanager patched
      Waiting for 1 pods to be ready...
      statefulset rolling update complete 1 pods at revision alertmanager-rancher-monitoring-alertmanager-c8bd4466c...
    3. Scale down the Grafana deployment. Run the following command and wait for all Grafana pods to roll out:

      kubectl scale --replicas=1 deployment/rancher-monitoring-grafana -n cattle-monitoring-system && \
      sleep 5 && \
      kubectl rollout status --watch=true -n cattle-monitoring-system deployment/rancher-monitoring-grafana

      A sample output looks like this:

      deployment.apps/rancher-monitoring-grafana scaled
      Waiting for deployment "rancher-monitoring-grafana" rollout to finish: 0 of 1 updated replicas are available...
      deployment "rancher-monitoring-grafana" successfully rolled out

    Start virtual machines

    1. Start a VM with the command:

      virtctl start $name -n $namespace

      Replace $name with the VM's name and $namespace with the VM's namespace. You can list all virtual machines with the command:

      kubectl get vms -A
      note

      You can also stop all VMs from the Harvester UI:

      1. Go to the Virtual Machines page.
      2. For each VM, select > Start.

      Alternatively, you can start all running VMs with the following command:

      cat vmi-backup.json | jq -r '.items[] | [.metadata.name, .metadata.namespace] | @tsv' | while IFS=$'\t' read -r name namespace; do
      if [ -z "$name" ]; then
      break
      fi
      echo "Start ${namespace}/${name}"
      virtctl start $name -n $namespace || true
      done
    - + \ No newline at end of file diff --git a/kb/evicting-replicas-from-a-disk-the-cli-way/index.html b/kb/evicting-replicas-from-a-disk-the-cli-way/index.html index 98e44bf0..7d7fa82e 100644 --- a/kb/evicting-replicas-from-a-disk-the-cli-way/index.html +++ b/kb/evicting-replicas-from-a-disk-the-cli-way/index.html @@ -9,14 +9,14 @@ Evicting Replicas From a Disk (the CLI way) | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    Evicting Replicas From a Disk (the CLI way)

    · 2 min read
    Kiefer Chang

    Harvester replicates volumes data across disks in a cluster. Before removing a disk, the user needs to evict replicas on the disk to other disks to preserve the volumes' configured availability. For more information about eviction in Longhorn, please check Evicting Replicas on Disabled Disks or Nodes.

    Preparation

    This document describes how to evict Longhorn disks using the kubectl command. Before that, users must ensure the environment is set up correctly. There are two recommended ways to do this:

    1. Log in to any management node and switch to root (sudo -i).
    2. Download Kubeconfig file and use it locally
      • Install kubectl and yq program manually.
      • Open Harvester GUI, click support at the bottom left of the page and click Download KubeConfig to download the Kubeconfig file.
      • Set the Kubeconfig file's path to KUBECONFIG environment variable. For example, export KUBECONFIG=/path/to/kubeconfig.

    Evicting replicas from a disk

    1. List Longhorn nodes (names are identical to Kubernetes nodes):

      kubectl get -n longhorn-system nodes.longhorn.io

      Sample output:

      NAME    READY   ALLOWSCHEDULING   SCHEDULABLE   AGE
      node1 True true True 24d
      node2 True true True 24d
      node3 True true True 24d
    2. List disks on a node. Assume we want to evict replicas of a disk on node1:

      kubectl get -n longhorn-system nodes.longhorn.io node1 -o yaml | yq e '.spec.disks'

      Sample output:

      default-disk-ed7af10f5b8356be:
      allowScheduling: true
      evictionRequested: false
      path: /var/lib/harvester/defaultdisk
      storageReserved: 36900254515
      tags: []
    3. Assume disk default-disk-ed7af10f5b8356be is the target we want to evict replicas out of.

      Edit the node:

      kubectl edit -n longhorn-system nodes.longhorn.io node1 

      Update these two fields and save:

      • spec.disks.<disk_name>.allowScheduling to false
      • spec.disks.<disk_name>.evictionRequested to true

      Sample editing:

      default-disk-ed7af10f5b8356be:
      allowScheduling: false
      evictionRequested: true
      path: /var/lib/harvester/defaultdisk
      storageReserved: 36900254515
      tags: []
    4. Wait for all replicas on the disk to be evicted.

      Get current scheduled replicas on the disk:

      kubectl get -n longhorn-system nodes.longhorn.io node1 -o yaml | yq e '.status.diskStatus.default-disk-ed7af10f5b8356be.scheduledReplica'

      Sample output:

      pvc-86d3d212-d674-4c64-b69b-4a2eb1df2272-r-7b422db7: 5368709120
      pvc-b06f0b09-f30c-4936-8a2a-425b993dd6cb-r-bb0fa6b3: 2147483648
      pvc-b844bcc6-3b06-4367-a136-3909251cb560-r-08d1ab3c: 53687091200
      pvc-ea6e0dff-f446-4a38-916a-b3bea522f51c-r-193ca5c6: 10737418240

      Run the command repeatedly, and the output should eventually become an empty map:

      {}

      This means Longhorn evicts replicas on the disk to other disks.

      note

      If a replica always stays in a disk, please open the Longhorn GUI and check if there is free space on other disks.

    - + \ No newline at end of file diff --git a/kb/index.html b/kb/index.html index 7922f8f9..20d97dda 100644 --- a/kb/index.html +++ b/kb/index.html @@ -9,12 +9,12 @@ Harvester HCI knowledge base | The open-source hyperconverged infrastructure solution for a cloud-native world - +
    -

    · 3 min read
    Jian Wang

    Harvester calculates the resource metrics using data that is dynamically collected from the system. Host-level resource metrics are calculated and then aggregated to obtain the cluster-level metrics.

    You can view resource-related metrics on the Harvester UI.

    • Hosts screen: Displays host-level metrics

      host level resources metrics

    • Dashboard screen: Displays cluster-level metrics

      cluster level resources metrics

    CPU and Memory

    The following sections describe the data sources and calculation methods for CPU and memory resources.

    • Resource capacity: Baseline data
    • Resource usage: Data source for the Used field on the Hosts screen
    • Resource reservation: Data source for the Reserved field on the Hosts screen

    Resource Capacity

    In Kubernetes, a Node object is created for each host.

    The .status.allocatable.cpu and .status.allocatable.memory represent the available CPU and Memory resources of a host.

    # kubectl get nodes -A -oyaml
    apiVersion: v1
    items:
    - apiVersion: v1
    kind: Node
    metadata:
    ..
    management.cattle.io/pod-limits: '{"cpu":"12715m","devices.kubevirt.io/kvm":"1","devices.kubevirt.io/tun":"1","devices.kubevirt.io/vhost-net":"1","memory":"17104951040"}'
    management.cattle.io/pod-requests: '{"cpu":"5657m","devices.kubevirt.io/kvm":"1","devices.kubevirt.io/tun":"1","devices.kubevirt.io/vhost-net":"1","ephemeral-storage":"50M","memory":"9155862208","pods":"78"}'
    node.alpha.kubernetes.io/ttl: "0"
    ..
    name: harv41
    resourceVersion: "2170215"
    uid: b6f5850a-2fbc-4aef-8fbe-121dfb671b67
    spec:
    podCIDR: 10.52.0.0/24
    podCIDRs:
    - 10.52.0.0/24
    providerID: rke2://harv41
    status:
    addresses:
    - address: 192.168.122.141
    type: InternalIP
    - address: harv41
    type: Hostname
    allocatable:
    cpu: "10"
    devices.kubevirt.io/kvm: 1k
    devices.kubevirt.io/tun: 1k
    devices.kubevirt.io/vhost-net: 1k
    ephemeral-storage: "149527126718"
    hugepages-1Gi: "0"
    hugepages-2Mi: "0"
    memory: 20464216Ki
    pods: "200"
    capacity:
    cpu: "10"
    devices.kubevirt.io/kvm: 1k
    devices.kubevirt.io/tun: 1k
    devices.kubevirt.io/vhost-net: 1k
    ephemeral-storage: 153707984Ki
    hugepages-1Gi: "0"
    hugepages-2Mi: "0"
    memory: 20464216Ki
    pods: "200"

    Resource Usage

    CPU and memory usage data is continuously collected and stored in the NodeMetrics object. Harvester reads the data from usage.cpu and usage.memory.

    # kubectl get NodeMetrics -A -oyaml
    apiVersion: v1
    items:
    - apiVersion: metrics.k8s.io/v1beta1
    kind: NodeMetrics
    metadata:
    ...
    name: harv41
    timestamp: "2024-01-23T12:04:44Z"
    usage:
    cpu: 891736742n
    memory: 9845008Ki
    window: 10.149s

    Resource Reservation

    Harvester dynamically calculates the resource limits and requests of all pods running on a host, and updates the information to the annotations of the NodeMetrics object.

          management.cattle.io/pod-limits: '{"cpu":"12715m",...,"memory":"17104951040"}'
    management.cattle.io/pod-requests: '{"cpu":"5657m",...,"memory":"9155862208"}'

    For more information, see Requests and Limits in the Kubernetes documentation.

    Storage

    Longhorn is the default Container Storage Interface (CSI) driver of Harvester, providing storage management features such as distributed block storage and tiering.

    Reserved Storage in Longhorn

    Longhorn allows you to specify the percentage of disk space that is not allocated to the default disk on each new Longhorn node. The default value is "30". For more information, see Storage Reserved Percentage For Default Disk in the Longhorn documentation.

    Depending on the disk size, you can modify the default value using the embedded Longhorn UI.

    ::: note

    Before changing the settings, read the Longhorn documentation carefully.

    :::

    Data Sources and Calculation

    Harvester uses the following data to calculate metrics for storage resources.

    • Sum of the storageMaximum values of all disks (status.diskStatus.disk-name): Total storage capacity

    • Total storage capacity - Sum of the storageAvailable values of all disks (status.diskStatus.disk-name): Data source for the Used field on the Hosts screen

    • Sum of the storageReserved values of all disks (spec.disks): Data source for the Reserved field on the Hosts screen

    # kubectl get nodes.longhorn.io -n longhorn-system -oyaml

    apiVersion: v1
    items:
    - apiVersion: longhorn.io/v1beta2
    kind: Node
    metadata:
    ..
    name: harv41
    namespace: longhorn-system
    ..
    spec:
    allowScheduling: true
    disks:
    default-disk-ef11a18c36b01132:
    allowScheduling: true
    diskType: filesystem
    evictionRequested: false
    path: /var/lib/harvester/defaultdisk
    storageReserved: 24220101427
    tags: []
    ..
    status:
    ..
    diskStatus:
    default-disk-ef11a18c36b01132:
    ..
    diskType: filesystem
    diskUUID: d2788933-8817-44c6-b688-dee414cc1f73
    scheduledReplica:
    pvc-95561210-c39c-4c2e-ac9a-4a9bd72b3100-r-20affeca: 2147483648
    pvc-9e83b2dc-6a4b-4499-ba70-70dc25b2d9aa-r-4ad05c86: 32212254720
    pvc-bc25be1e-ca4e-4818-a16d-48353a0f2f96-r-c7b88c60: 3221225472
    pvc-d9d3e54d-8d67-4740-861e-6373f670f1e4-r-f4c7c338: 2147483648
    pvc-e954b5fe-bbd7-4d44-9866-6ff6684d5708-r-ba6b87b6: 5368709120
    storageAvailable: 77699481600
    storageMaximum: 80733671424
    storageScheduled: 45097156608
    region: ""
    snapshotCheckStatus: {}
    zone: ""

    · 2 min read
    David Ko
    Jillian Maroket

    The Longhorn documentation provides best practice recommendations for deploying Longhorn in production environments. Before configuring workloads, ensure that you have set up the following basic requirements for optimal disk performance.

    • SATA/NVMe SSDs or disk drives with similar performance
    • 10 Gbps network bandwidth between nodes
    • Dedicated Priority Classes for system-managed and user-deployed Longhorn components

    The following sections outline other recommendations for achieving optimal disk performance.

    IO Performance

    • Storage network: Use a dedicated storage network to improve IO performance and stability.

    • Longhorn disk: Use a dedicated disk for Longhorn storage instead of using the root disk.

    • Replica count: Set the default replica count to "2" to achieve data availability with better disk space usage or less impact to system performance. This practice is especially beneficial to data-intensive applications.

    • Storage tag: Use storage tags to define storage tiering for data-intensive applications. For example, only high-performance disks can be used for storing performance-sensitive data. You can either add disks with tags or create StorageClasses with tags.

    • Data locality: Use best-effort as the default data locality of Longhorn Storage Classes.

      For applications that support data replication (for example, a distributed database), you can use the strict-local option to ensure that only one replica is created for each volume. This practice prevents the extra disk space usage and IO performance overhead associated with volume replication.

      For data-intensive applications, you can use pod scheduling functions such as node selector or taint toleration. These functions allow you to schedule the workload to a specific storage-tagged node together with one replica.

    Space Efficiency

    • Recurring snapshots: Periodically clean up system-generated snapshots and retain only the number of snapshots that makes sense for your implementation.

      For applications with replication capability, periodically delete all types of snapshots.

    Disaster Recovery

    • Recurring backups: Create recurring backup jobs for mission-critical application volumes.

    • System backup: Run periodic system backups.

    · 11 min read
    Jian Wang

    In Harvester, the VM Live Migration is well supported by the UI. Please refer to Harvester VM Live Migration for more details.

    The VM Live Migration process is finished smoothly in most cases. However, sometimes the migration may get stuck and not end as expected.

    This article dives into the VM Live Migration process in more detail. There are three main parts:

    • General Process of VM Live Migration
    • VM Live Migration Strategies
    • VM Live Migration Configurations

    Related issues:

    note

    A big part of the following contents are copied from kubevirt document https://kubevirt.io/user-guide/operations/live_migration/, some contents/formats are adjusted to fit in this document.

    General Process of VM Live Migration

    Starting a Migration from Harvester UI

    1. Go to the Virtual Machines page.
    2. Find the virtual machine that you want to migrate and select > Migrate.
    3. Choose the node to which you want to migrate the virtual machine and select Apply.

    After successfully selecting Apply, a CRD VirtualMachineInstanceMigration object is created, and the related controller/operator will start the process.

    Migration CRD Object

    You can also create the CRD VirtualMachineInstanceMigration object manually via kubectl or other tools.

    The example below starts a migration process for a virtual machine instance (VMI) new-vm.

    apiVersion: kubevirt.io/v1
    kind: VirtualMachineInstanceMigration
    metadata:
    name: migration-job
    spec:
    vmiName: new-vm

    Under the hood, the open source projects Kubevirt, Libvirt, QEMU, ... perform most of the VM Live Migration. References.

    Migration Status Reporting

    When starting a virtual machine instance (VMI), it has also been calculated whether the machine is live migratable. The result is being stored in the VMI VMI.status.conditions. The calculation can be based on multiple parameters of the VMI, however, at the moment, the calculation is largely based on the Access Mode of the VMI volumes. Live migration is only permitted when the volume access mode is set to ReadWriteMany. Requests to migrate a non-LiveMigratable VMI will be rejected.

    The reported Migration Method is also being calculated during VMI start. BlockMigration indicates that some of the VMI disks require copying from the source to the destination. LiveMigration means that only the instance memory will be copied.

    Status:
    Conditions:
    Status: True
    Type: LiveMigratable
    Migration Method: BlockMigration

    Migration Status

    The migration progress status is reported in VMI.status. Most importantly, it indicates whether the migration has been completed or failed.

    Below is an example of a successful migration.

    Migration State:
    Completed: true
    End Timestamp: 2019-03-29T03:37:52Z
    Migration Config:
    Completion Timeout Per GiB: 800
    Progress Timeout: 150
    Migration UID: c64d4898-51d3-11e9-b370-525500d15501
    Source Node: node02
    Start Timestamp: 2019-03-29T04:02:47Z
    Target Direct Migration Node Ports:
    35001: 0
    41068: 49152
    38284: 49153
    Target Node: node01
    Target Node Address: 10.128.0.46
    Target Node Domain Detected: true
    Target Pod: virt-launcher-testvmimcbjgw6zrzcmp8wpddvztvzm7x2k6cjbdgktwv8tkq

    VM Live Migration Strategies

    VM Live Migration is a process during which a running Virtual Machine Instance moves to another compute node while the guest workload continues to run and remain accessible.

    Understanding Different VM Live Migration Strategies

    VM Live Migration is a complex process. During a migration, the source VM needs to transfer its whole state (mainly RAM) to the target VM. If there are enough resources available, such as network bandwidth and CPU power, migrations should converge nicely. If this is not the scenario, however, the migration might get stuck without an ability to progress.

    The main factor that affects migrations from the guest perspective is its dirty rate, which is the rate by which the VM dirties memory. Guests with high dirty rate lead to a race during migration. On the one hand, memory would be transferred continuously to the target, and on the other, the same memory would get dirty by the guest. On such scenarios, one could consider to use more advanced migration strategies. Refer to Understanding different migration strategies for more details.

    There are 3 VM Live Migration strategies/policies:

    VM Live Migration Strategy: Pre-copy

    Pre-copy is the default strategy. It should be used for most cases.

    The way it works is as following:

    1. The target VM is created, but the guest keeps running on the source VM.
    2. The source starts sending chunks of VM state (mostly memory) to the target. This continues until all of the state has been transferred to the target.
    3. The guest starts executing on the target VM. 4. The source VM is being removed.

    Pre-copy is the safest and fastest strategy for most cases. Furthermore, it can be easily cancelled, can utilize multithreading, and more. If there is no real reason to use another strategy, this is definitely the strategy to go with.

    However, on some cases migrations might not converge easily, that is, by the time the chunk of source VM state would be received by the target VM, it would already be mutated by the source VM (which is the VM the guest executes on). There are many reasons for migrations to fail converging, such as a high dirty-rate or low resources like network bandwidth and CPU. On such scenarios, see the following alternative strategies below.

    VM Live Migration Strategy: Post-copy

    The way post-copy migrations work is as following:

    1. The target VM is created.
    2. The guest is being run on the target VM.
    3. The source starts sending chunks of VM state (mostly memory) to the target.
    4. When the guest, running on the target VM, would access memory: 1. If the memory exists on the target VM, the guest can access it. 2. Otherwise, the target VM asks for a chunk of memory from the source VM.
    5. Once all of the memory state is updated at the target VM, the source VM is being removed.

    The main idea here is that the guest starts to run immediately on the target VM. This approach has advantages and disadvantages:

    Advantages:

    • The same memory chink is never being transferred twice. This is possible due to the fact that with post-copy it doesn't matter that a page had been dirtied since the guest is already running on the target VM.
    • This means that a high dirty-rate has much less effect.
    • Consumes less network bandwidth.

    Disadvantages:

    • When using post-copy, the VM state has no one source of truth. When the guest (running on the target VM) writes to memory, this memory is one part of the guest's state, but some other parts of it may still be updated only at the source VM. This situation is generally dangerous, since, for example, if either the target or guest VMs crash the state cannot be recovered.
    • Slow warmup: when the guest starts executing, no memory is present at the target VM. Therefore, the guest would have to wait for a lot of memory in a short period of time.
    • Slower than pre-copy on most cases.
    • Harder to cancel a migration.

    VM Live Migration Strategy: Auto-converge

    Auto-converge is a technique to help pre-copy migrations converge faster without changing the core algorithm of how the migration works.

    Since a high dirty-rate is usually the most significant factor for migrations to not converge, auto-converge simply throttles the guest's CPU. If the migration would converge fast enough, the guest's CPU would not be throttled or throttled negligibly. But, if the migration would not converge fast enough, the CPU would be throttled more and more as time goes.

    This technique dramatically increases the probability of the migration converging eventually.

    Observe the VM Live Migration Progress and Result

    Migration Timeouts

    Depending on the type, the live migration process will copy virtual machine memory pages and disk blocks to the destination. During this process non-locked pages and blocks are being copied and become free for the instance to use again. To achieve a successful migration, it is assumed that the instance will write to the free pages and blocks (pollute the pages) at a lower rate than these are being copied.

    Completion Time

    In some cases the virtual machine can write to different memory pages / disk blocks at a higher rate than these can be copied, which will prevent the migration process from completing in a reasonable amount of time. In this case, live migration will be aborted if it is running for a long period of time. The timeout is calculated base on the size of the VMI, it's memory and the ephemeral disks that are needed to be copied. The configurable parameter completionTimeoutPerGiB, which defaults to 800s is the time for GiB of data to wait for the migration to be completed before aborting it. A VMI with 8Gib of memory will time out after 6400 seconds.

    Progress Timeout

    A VM Live Migration will also be aborted when it notices that copying memory doesn't make any progress. The time to wait for live migration to make progress in transferring data is configurable by the progressTimeout parameter, which defaults to 150 seconds.

    VM Live Migration Configurations

    Changing Cluster Wide Migration Limits

    KubeVirt puts some limits in place so that migrations don't overwhelm the cluster. By default, it is to only run 5 migrations in parallel with an additional limit of a maximum of 2 outbound migrations per node. Finally, every migration is limited to a bandwidth of 64MiB/s.

    You can change these values in the kubevirt CR:

        apiVersion: kubevirt.io/v1
    kind: Kubevirt
    metadata:
    name: kubevirt
    namespace: kubevirt
    spec:
    configuration:
    migrations:
    parallelMigrationsPerCluster: 5
    parallelOutboundMigrationsPerNode: 2
    bandwidthPerMigration: 64Mi
    completionTimeoutPerGiB: 800
    progressTimeout: 150
    disableTLS: false
    nodeDrainTaintKey: "kubevirt.io/drain"
    allowAutoConverge: false ---------------------> related to: Auto-converge
    allowPostCopy: false -------------------------> related to: Post-copy
    unsafeMigrationOverride: false

    Remember that most of these configurations can be overridden and fine-tuned to a specified group of VMs. For more information, please refer to the Migration Policies section below.

    Migration Policies

    Migration policies provides a new way of applying migration configurations to Virtual Machines. The policies can refine Kubevirt CR's MigrationConfiguration that sets the cluster-wide migration configurations. This way, the cluster-wide settings default how the migration policy can be refined (i.e., changed, removed, or added).

    Remember that migration policies are in version v1alpha1. This means that this API is not fully stable yet and that APIs may change in the future.

    Migration Configurations

    Currently, the MigrationPolicy spec only includes the following configurations from Kubevirt CR's MigrationConfiguration. (In the future, more configurations that aren't part of Kubevirt CR will be added):

    apiVersion: migrations.kubevirt.io/v1alpha1
    kind: MigrationPolicy
    spec:
    allowAutoConverge: true
    bandwidthPerMigration: 217Ki
    completionTimeoutPerGiB: 23
    allowPostCopy: false

    All the above fields are optional. When omitted, the configuration will be applied as defined in KubevirtCR's MigrationConfiguration. This way, KubevirtCR will serve as a configurable set of defaults for both VMs that are not bound to any MigrationPolicy and VMs that are bound to a MigrationPolicy that does not define all fields of the configurations.

    Matching Policies to VMs

    Next in the spec are the selectors defining the group of VMs to apply the policy. The options to do so are the following.

    This policy applies to the VMs in namespaces that have all the required labels:

    apiVersion: migrations.kubevirt.io/v1alpha1
    kind: MigrationPolicy
    spec:
    selectors:
    namespaceSelector:
    hpc-workloads: true # Matches a key and a value

    The policy below applies to the VMs that have all the required labels:

    apiVersion: migrations.kubevirt.io/v1alpha1
    kind: MigrationPolicy
    spec:
    selectors:
    virtualMachineInstanceSelector:
    workload-type: db # Matches a key and a value

    References

    Documents

    Libvirt Guest Migration

    Libvirt has a chapter to describe the pricipal of VM/Guest Live Migration.

    https://libvirt.org/migration.html

    Kubevirt Live Migration

    https://kubevirt.io/user-guide/operations/live_migration/

    Source Code

    The VM Live Migration related configuration options are passed to each layer correspondingly.

    Kubevirt

    https://github.com/kubevirt/kubevirt/blob/d425593ae392111dab80403ef0cde82625e37653/pkg/virt-launcher/virtwrap/live-migration-source.go#L103

    ...
    import "libvirt.org/go/libvirt"

    ...

    func generateMigrationFlags(isBlockMigration, migratePaused bool, options *cmdclient.MigrationOptions) libvirt.DomainMigrateFlags {
    ...
    if options.AllowAutoConverge {
    migrateFlags |= libvirt.MIGRATE_AUTO_CONVERGE
    }
    if options.AllowPostCopy {
    migrateFlags |= libvirt.MIGRATE_POSTCOPY
    }
    ...
    }

    Go Package Libvirt

    https://pkg.go.dev/libvirt.org/go/libvirt

    const (
    ...
    MIGRATE_AUTO_CONVERGE = DomainMigrateFlags(C.VIR_MIGRATE_AUTO_CONVERGE)
    MIGRATE_RDMA_PIN_ALL = DomainMigrateFlags(C.VIR_MIGRATE_RDMA_PIN_ALL)
    MIGRATE_POSTCOPY = DomainMigrateFlags(C.VIR_MIGRATE_POSTCOPY)
    ...
    )

    Libvirt

    https://github.com/libvirt/libvirt/blob/bfe53e9145cd5996a791c5caff0686572b850f82/include/libvirt/libvirt-domain.h#L1030

        /* Enable algorithms that ensure a live migration will eventually converge.
    * This usually means the domain will be slowed down to make sure it does
    * not change its memory faster than a hypervisor can transfer the changed
    * memory to the destination host. VIR_MIGRATE_PARAM_AUTO_CONVERGE_*
    * parameters can be used to tune the algorithm.
    *
    * Since: 1.2.3
    */
    VIR_MIGRATE_AUTO_CONVERGE = (1 << 13),
    ...
    /* Setting the VIR_MIGRATE_POSTCOPY flag tells libvirt to enable post-copy
    * migration. However, the migration will start normally and
    * virDomainMigrateStartPostCopy needs to be called to switch it into the
    * post-copy mode. See virDomainMigrateStartPostCopy for more details.
    *
    * Since: 1.3.3
    */
    VIR_MIGRATE_POSTCOPY = (1 << 15),

    · 4 min read
    Hang Yu

    Starting with Harvester v1.2.0, it offers the capability to install a Container Storage Interface (CSI) in your Harvester cluster. This allows you to leverage external storage for the Virtual Machine's non-system data disk, giving you the flexibility to use different drivers tailored for specific needs, whether it's for performance optimization or seamless integration with your existing in-house storage solutions.

    It's important to note that, despite this enhancement, the provisioner for the Virtual Machine (VM) image in Harvester still relies on Longhorn. Prior to version 1.2.0, Harvester exclusively supported Longhorn for storing VM data and did not offer support for external storage as a destination for VM data.

    One of the options for integrating external storage with Harvester is Rook, an open-source cloud-native storage orchestrator. Rook provides a robust platform, framework, and support for Ceph storage, enabling seamless integration with cloud-native environments.

    Ceph is a software-defined distributed storage system that offers versatile storage capabilities, including file, block, and object storage. It is designed for large-scale production clusters and can be deployed effectively in such environments.

    Rook simplifies the deployment and management of Ceph, offering self-managing, self-scaling, and self-healing storage services. It leverages Kubernetes resources to automate the deployment, configuration, provisioning, scaling, upgrading, and monitoring of Ceph.

    In this article, we will walk you through the process of installing, configuring, and utilizing Rook to use storage from an existing external Ceph cluster as a data disk for a VM within the Harvester environment.

    Install Harvester Cluster

    Harvester's operating system follows an immutable design, meaning that most OS files revert to their pre-configured state after a reboot. To accommodate Rook Ceph's requirements, you need to add specific persistent paths to the os.persistentStatePaths section in the Harvester configuration. These paths include:

    os:
    persistent_state_paths:
    - /var/lib/rook
    - /var/lib/ceph
    modules:
    - rbd
    - nbd

    After the cluster is installed, refer to How can I access the kubeconfig file of the Harvester cluster? to get the kubeconfig of the Harvester cluster.

    Install Rook to Harvester

    Install Rook to the Harvester cluster by referring to Rook Quickstart.

    curl -fsSLo rook.tar.gz https://github.com/rook/rook/archive/refs/tags/v1.12.2.tar.gz \
    && tar -zxf rook.tar.gz && cd rook-1.12.2/deploy/examples
    # apply configurations ref: https://rook.github.io/docs/rook/v1.12/Getting-Started/example-configurations/
    kubectl apply -f crds.yaml -f common.yaml -f operator.yaml
    kubectl -n rook-ceph wait --for=condition=Available deploy rook-ceph-operator --timeout=10m

    Using an existing external Ceph cluster

    1. Run the python script create-external-cluster-resources.py in the existing external Ceph cluster for creating all users and keys.
    # script help ref: https://www.rook.io/docs/rook/v1.12/CRDs/Cluster/external-cluster/#1-create-all-users-and-keys
    curl -s https://raw.githubusercontent.com/rook/rook/v1.12.2/deploy/examples/create-external-cluster-resources.py > create-external-cluster-resources.py
    python3 create-external-cluster-resources.py --rbd-data-pool-name <pool_name> --namespace rook-ceph-external --format bash
    1. Copy the Bash output.

    Example output:

    export NAMESPACE=rook-ceph-external
    export ROOK_EXTERNAL_FSID=b3b47828-4c60-11ee-be38-51902f85c805
    export ROOK_EXTERNAL_USERNAME=client.healthchecker
    export ROOK_EXTERNAL_CEPH_MON_DATA=ceph-1=192.168.5.99:6789
    export ROOK_EXTERNAL_USER_SECRET=AQDd6/dkFyu/IhAATv/uCMbHtWk4AYK2KXzBhQ==
    export ROOK_EXTERNAL_DASHBOARD_LINK=https://192.168.5.99:8443/
    export CSI_RBD_NODE_SECRET=AQDd6/dk2HsjIxAA06Yw9UcOg0dfwV/9IFBRhA==
    export CSI_RBD_NODE_SECRET_NAME=csi-rbd-node
    export CSI_RBD_PROVISIONER_SECRET=AQDd6/dkEY1kIxAAAzrXZnVRf4x+wDUz1zyaQg==
    export CSI_RBD_PROVISIONER_SECRET_NAME=csi-rbd-provisioner
    export MONITORING_ENDPOINT=192.168.5.99
    export MONITORING_ENDPOINT_PORT=9283
    export RBD_POOL_NAME=test
    export RGW_POOL_PREFIX=default
    1. Consume the external Ceph cluster resources on the Harvester cluster.
    # Paste the above output from create-external-cluster-resources.py into import-env.sh
    vim import-env.sh
    source import-env.sh
    # this script will create a StorageClass ceph-rbd
    source import-external-cluster.sh
    kubectl apply -f common-external.yaml
    kubectl apply -f cluster-external.yaml
    # wait for all pods to become Ready
    watch 'kubectl --namespace rook-ceph get pods'
    1. Create the VolumeSnapshotClass csi-rbdplugin-snapclass-external.
    cat >./csi/rbd/snapshotclass-external.yaml <<EOF
    ---
    apiVersion: snapshot.storage.k8s.io/v1
    kind: VolumeSnapshotClass
    metadata:
    name: csi-rbdplugin-snapclass-external
    driver: rook-ceph.rbd.csi.ceph.com # driver:namespace:operator
    parameters:
    clusterID: rook-ceph-external # namespace:cluster
    csi.storage.k8s.io/snapshotter-secret-name: rook-csi-rbd-provisioner
    csi.storage.k8s.io/snapshotter-secret-namespace: rook-ceph-external # namespace:cluster
    deletionPolicy: Delete
    EOF

    kubectl apply -f ./csi/rbd/snapshotclass-external.yaml

    Configure Harvester Cluster

    Before you can make use of Harvester's Backup & Snapshot features, you need to set up some essential configurations through the Harvester csi-driver-config setting. To set up these configurations, follow these steps:

    1. Login to the Harvester UI, then navigate to Advanced > Settings.
    2. Find and select csi-driver-config, and then click on the > Edit Setting to access the configuration options.
    3. In the settings, set the Provisioner to rook-ceph.rbd.csi.ceph.com.
    4. Next, specify the Volume Snapshot Class Name as csi-rbdplugin-snapclass-external. This setting points to the name of the VolumeSnapshotClass used for creating volume snapshots or VM snapshots.
    5. Similarly, set the Backup Volume Snapshot Class Name to csi-rbdplugin-snapclass-external. This corresponds to the name of the VolumeSnapshotClass responsible for creating VM backups.

    csi-driver-config-external

    Use Rook Ceph in Harvester

    After successfully configuring these settings, you can proceed to utilize the Rook Ceph StorageClass, which is named rook-ceph-block for the internal Ceph cluster or named ceph-rbd for the external Ceph cluster. You can apply this StorageClass when creating an empty volume or adding a new block volume to a VM, enhancing your Harvester cluster's storage capabilities.

    With these configurations in place, your Harvester cluster is ready to make the most of the Rook Ceph storage integration.

    rook-ceph-volume-external

    rook-ceph-vm-external

    · 3 min read
    Canwu Yao

    As Harvester v1.2.0 is released, a new Harvester cloud provider version 0.2.2 is integrated into RKE2 v1.24.15+rke2r1, v1.25.11+rke2r1, v1.26.6+rke2r1, v1.27.3+rke2r1, and newer versions.

    With Harvester v1.2.0, the new Harvester cloud provider offers enhanced load balancing capabilities for guest Kubernetes services. Specifically, it introduces the Harvester IP Pool feature, a built-in IP address management (IPAM) solution for the Harvester load balancer. It allows you to define an IP pool specific to a particular guest cluster by specifying the guest cluster name. For example, you can create an IP pool exclusively for the guest cluster named cluster2:

    image

    However, after upgrading, the feature is not automatically compatible with existing guest Kubernetes clusters, as they do not pass the correct cluster name to the Harvester cloud provider. Refer to issue 4232 for more details. Users can manually upgrade the Harvester cloud provider using Helm as a workaround and provide the correct cluster name after upgrading. However, this would result in a change in the load balancer IPs.

    This article outlines a workaround that allows you to leverage the new IP pool feature while keeping the load balancer IPs unchanged.

    Prerequisites

    • Download the Harvester kubeconfig file from the Harvester UI. If you have imported Harvester into Rancher, do not use the kubeconfig file from the Rancher UI. Refer to Access Harvester Cluster to get the desired one.

    • Download the kubeconfig file for the guest Kubernetes cluster you plan to upgrade. Refer to Accessing Clusters with kubectl from Your Workstation for instructions on how to download the kubeconfig file.

    Steps to Keep Load Balancer IP

    1. Execute the following script before upgrading.

      curl -sfL https://raw.githubusercontent.com/harvester/harvesterhci.io/main/kb/2023-08-21/keepip.sh | sh -s before_upgrade <Harvester-kubeconfig-path> <guest-cluster-kubeconfig-path> <guest-cluster-name> <guest-cluster-nodes-namespace>
      • <Harvester-kubeconfig-path>: Path to the Harvester kubeconfig file.
      • <guest-cluster-kubeconfig-path>: Path to the kubeconfig file of your guest Kubernetes cluster.
      • <guest-cluster-name>: Name of your guest cluster.
      • <guest-cluster-nodes-namespace>: Namespace where the VMs of the guest cluster are located.

      The script will help users copy the DHCP information to the service annotation and modify the IP pool allocated history to make sure the IP is unchanged.

      image

      After executing the script, the load balancer service with DHCP mode will be annotated with the DHCP information. For example:

      apiVersion: v1
      kind: Service
      metadata:
      annotations:
      kube-vip.io/hwaddr: 00:00:6c:4f:18:68
      kube-vip.io/requestedIP: 172.19.105.215
      name: lb0
      namespace: default

      As for the load balancer service with pool mode, the IP pool allocated history will be modified as the new load balancer name. For example:

      apiVersion: loadbalancer.harvesterhci.io/v1beta1
      kind: IPPool
      metadata:
      name: default
      spec:
      ...
      status:
      allocatedHistory:
      192.168.100.2: default/cluster-name-default-lb1-ddc13071 # replace the new load balancer name
    2. Add network selector for the pool.

      For example, the following cluster is under the VM network default/mgmt-untagged. The network selector should be default/mgmt-untagged.

      image

      image

    3. Upgrade the RKE2 cluster in the Rancher UI and select the new version.

      image

    4. Execute the script after upgrading.

      curl -sfL https://raw.githubusercontent.com/harvester/harvesterhci.io/main/kb/2023-08-21/keepip.sh | sh -s after_upgrade <Harvester-kubeconfig-path> <guest-cluster-kubeconfig-path> <guest-cluster-name> <guest-cluster-nodes-namespace>

      image

      In this step, the script wraps the operations to upgrade the Harvester cloud provider to set the cluster name. After the Harvester cloud provider is running, the new Harvester load balancers will be created with the unchanged IPs.

    · 7 min read

    This article covers instructions for installing the Netapp Astra Trident CSI driver into a Harvester cluster, which enables NetApp storage systems to store storage volumes usable by virtual machines running in Harvester.

    The NetApp storage will be an option in addition to the normal Longhorn storage; it will not replace Longhorn. Virtual machine images will still be stored using Longhorn.

    This has been tested with Harvester 1.2.0 and Trident v23.07.0.

    This procedure only works to access storage via iSCSI, not NFS.

    note

    3rd party storage classes (including those based on Trident) can only be used for non-boot volumes of Harvester VMs.

    Detailed Instructions

    We assume that before beginning this procedure, a Harvester cluster and a NetApp ONTAP storage system are both installed and configured for use.

    Most of these steps can be performed on any system with the helm and kubectl commands installed and network connectivity to the management port of the Harvester cluster. Let's call this your workstation. Certain steps must be performed on one or more cluster nodes themselves. The steps described below should be done on your workstation unless otherwise indicated.

    The last step (enabling multipathd) should be done on all nodes after the Trident CSI has been installed.

    Certain parameters of your installation will require modification of details in the examples in the procedure given below. Those which you may wish to modify include:

    • The namespace. trident is used as the namespace in the examples, but you may prefer to use another.
    • The name of the deployment. mytrident is used but you can change this to something else.
    • The management IP address of the ONTAP storage system
    • Login credentials (username and password) of the ONTAP storage system

    The procedure is as follows.

    1. Read the NetApp Astra Trident documentation:

      The simplest method is to install using Helm; that process is described here.

    2. Download the KubeConfig from the Harvester cluster.

      • Open the web UI for your Harvester cluster
      • In the lower left corner, click the "Support" link. This will take you to a "Harvester Support" page.
      • Click the button labeled "Download KubeConfig". This will download a your cluster config in a file called "local.yaml" by default.
      • Move this file to a convenient location and set your KUBECONFIG environment variable to the path of this file.
    3. Prepare the cluster for installation of the Helm chart.

      Before starting installation of the helm chart, special authorization must be provided to enable certain modifications to be made during the installation. +

      · 3 min read
      Jian Wang

      Harvester calculates the resource metrics using data that is dynamically collected from the system. Host-level resource metrics are calculated and then aggregated to obtain the cluster-level metrics.

      You can view resource-related metrics on the Harvester UI.

      • Hosts screen: Displays host-level metrics

        host level resources metrics

      • Dashboard screen: Displays cluster-level metrics

        cluster level resources metrics

      CPU and Memory

      The following sections describe the data sources and calculation methods for CPU and memory resources.

      • Resource capacity: Baseline data
      • Resource usage: Data source for the Used field on the Hosts screen
      • Resource reservation: Data source for the Reserved field on the Hosts screen

      Resource Capacity

      In Kubernetes, a Node object is created for each host.

      The .status.allocatable.cpu and .status.allocatable.memory represent the available CPU and Memory resources of a host.

      # kubectl get nodes -A -oyaml
      apiVersion: v1
      items:
      - apiVersion: v1
      kind: Node
      metadata:
      ..
      management.cattle.io/pod-limits: '{"cpu":"12715m","devices.kubevirt.io/kvm":"1","devices.kubevirt.io/tun":"1","devices.kubevirt.io/vhost-net":"1","memory":"17104951040"}'
      management.cattle.io/pod-requests: '{"cpu":"5657m","devices.kubevirt.io/kvm":"1","devices.kubevirt.io/tun":"1","devices.kubevirt.io/vhost-net":"1","ephemeral-storage":"50M","memory":"9155862208","pods":"78"}'
      node.alpha.kubernetes.io/ttl: "0"
      ..
      name: harv41
      resourceVersion: "2170215"
      uid: b6f5850a-2fbc-4aef-8fbe-121dfb671b67
      spec:
      podCIDR: 10.52.0.0/24
      podCIDRs:
      - 10.52.0.0/24
      providerID: rke2://harv41
      status:
      addresses:
      - address: 192.168.122.141
      type: InternalIP
      - address: harv41
      type: Hostname
      allocatable:
      cpu: "10"
      devices.kubevirt.io/kvm: 1k
      devices.kubevirt.io/tun: 1k
      devices.kubevirt.io/vhost-net: 1k
      ephemeral-storage: "149527126718"
      hugepages-1Gi: "0"
      hugepages-2Mi: "0"
      memory: 20464216Ki
      pods: "200"
      capacity:
      cpu: "10"
      devices.kubevirt.io/kvm: 1k
      devices.kubevirt.io/tun: 1k
      devices.kubevirt.io/vhost-net: 1k
      ephemeral-storage: 153707984Ki
      hugepages-1Gi: "0"
      hugepages-2Mi: "0"
      memory: 20464216Ki
      pods: "200"

      Resource Usage

      CPU and memory usage data is continuously collected and stored in the NodeMetrics object. Harvester reads the data from usage.cpu and usage.memory.

      # kubectl get NodeMetrics -A -oyaml
      apiVersion: v1
      items:
      - apiVersion: metrics.k8s.io/v1beta1
      kind: NodeMetrics
      metadata:
      ...
      name: harv41
      timestamp: "2024-01-23T12:04:44Z"
      usage:
      cpu: 891736742n
      memory: 9845008Ki
      window: 10.149s

      Resource Reservation

      Harvester dynamically calculates the resource limits and requests of all pods running on a host, and updates the information to the annotations of the NodeMetrics object.

            management.cattle.io/pod-limits: '{"cpu":"12715m",...,"memory":"17104951040"}'
      management.cattle.io/pod-requests: '{"cpu":"5657m",...,"memory":"9155862208"}'

      For more information, see Requests and Limits in the Kubernetes documentation.

      Storage

      Longhorn is the default Container Storage Interface (CSI) driver of Harvester, providing storage management features such as distributed block storage and tiering.

      Reserved Storage in Longhorn

      Longhorn allows you to specify the percentage of disk space that is not allocated to the default disk on each new Longhorn node. The default value is "30". For more information, see Storage Reserved Percentage For Default Disk in the Longhorn documentation.

      Depending on the disk size, you can modify the default value using the embedded Longhorn UI.

      note

      Before changing the settings, read the Longhorn documentation carefully.

      Data Sources and Calculation

      Harvester uses the following data to calculate metrics for storage resources.

      • Sum of the storageMaximum values of all disks (status.diskStatus.disk-name): Total storage capacity

      • Total storage capacity - Sum of the storageAvailable values of all disks (status.diskStatus.disk-name): Data source for the Used field on the Hosts screen

      • Sum of the storageReserved values of all disks (spec.disks): Data source for the Reserved field on the Hosts screen

      # kubectl get nodes.longhorn.io -n longhorn-system -oyaml

      apiVersion: v1
      items:
      - apiVersion: longhorn.io/v1beta2
      kind: Node
      metadata:
      ..
      name: harv41
      namespace: longhorn-system
      ..
      spec:
      allowScheduling: true
      disks:
      default-disk-ef11a18c36b01132:
      allowScheduling: true
      diskType: filesystem
      evictionRequested: false
      path: /var/lib/harvester/defaultdisk
      storageReserved: 24220101427
      tags: []
      ..
      status:
      ..
      diskStatus:
      default-disk-ef11a18c36b01132:
      ..
      diskType: filesystem
      diskUUID: d2788933-8817-44c6-b688-dee414cc1f73
      scheduledReplica:
      pvc-95561210-c39c-4c2e-ac9a-4a9bd72b3100-r-20affeca: 2147483648
      pvc-9e83b2dc-6a4b-4499-ba70-70dc25b2d9aa-r-4ad05c86: 32212254720
      pvc-bc25be1e-ca4e-4818-a16d-48353a0f2f96-r-c7b88c60: 3221225472
      pvc-d9d3e54d-8d67-4740-861e-6373f670f1e4-r-f4c7c338: 2147483648
      pvc-e954b5fe-bbd7-4d44-9866-6ff6684d5708-r-ba6b87b6: 5368709120
      storageAvailable: 77699481600
      storageMaximum: 80733671424
      storageScheduled: 45097156608
      region: ""
      snapshotCheckStatus: {}
      zone: ""

      · 2 min read
      David Ko
      Jillian Maroket

      The Longhorn documentation provides best practice recommendations for deploying Longhorn in production environments. Before configuring workloads, ensure that you have set up the following basic requirements for optimal disk performance.

      • SATA/NVMe SSDs or disk drives with similar performance
      • 10 Gbps network bandwidth between nodes
      • Dedicated Priority Classes for system-managed and user-deployed Longhorn components

      The following sections outline other recommendations for achieving optimal disk performance.

      IO Performance

      • Storage network: Use a dedicated storage network to improve IO performance and stability.

      • Longhorn disk: Use a dedicated disk for Longhorn storage instead of using the root disk.

      • Replica count: Set the default replica count to "2" to achieve data availability with better disk space usage or less impact to system performance. This practice is especially beneficial to data-intensive applications.

      • Storage tag: Use storage tags to define storage tiering for data-intensive applications. For example, only high-performance disks can be used for storing performance-sensitive data. You can either add disks with tags or create StorageClasses with tags.

      • Data locality: Use best-effort as the default data locality of Longhorn Storage Classes.

        For applications that support data replication (for example, a distributed database), you can use the strict-local option to ensure that only one replica is created for each volume. This practice prevents the extra disk space usage and IO performance overhead associated with volume replication.

        For data-intensive applications, you can use pod scheduling functions such as node selector or taint toleration. These functions allow you to schedule the workload to a specific storage-tagged node together with one replica.

      Space Efficiency

      • Recurring snapshots: Periodically clean up system-generated snapshots and retain only the number of snapshots that makes sense for your implementation.

        For applications with replication capability, periodically delete all types of snapshots.

      Disaster Recovery

      • Recurring backups: Create recurring backup jobs for mission-critical application volumes.

      • System backup: Run periodic system backups.

      · 11 min read
      Jian Wang

      In Harvester, the VM Live Migration is well supported by the UI. Please refer to Harvester VM Live Migration for more details.

      The VM Live Migration process is finished smoothly in most cases. However, sometimes the migration may get stuck and not end as expected.

      This article dives into the VM Live Migration process in more detail. There are three main parts:

      • General Process of VM Live Migration
      • VM Live Migration Strategies
      • VM Live Migration Configurations

      Related issues:

      note

      A big part of the following contents are copied from kubevirt document https://kubevirt.io/user-guide/operations/live_migration/, some contents/formats are adjusted to fit in this document.

      General Process of VM Live Migration

      Starting a Migration from Harvester UI

      1. Go to the Virtual Machines page.
      2. Find the virtual machine that you want to migrate and select > Migrate.
      3. Choose the node to which you want to migrate the virtual machine and select Apply.

      After successfully selecting Apply, a CRD VirtualMachineInstanceMigration object is created, and the related controller/operator will start the process.

      Migration CRD Object

      You can also create the CRD VirtualMachineInstanceMigration object manually via kubectl or other tools.

      The example below starts a migration process for a virtual machine instance (VMI) new-vm.

      apiVersion: kubevirt.io/v1
      kind: VirtualMachineInstanceMigration
      metadata:
      name: migration-job
      spec:
      vmiName: new-vm

      Under the hood, the open source projects Kubevirt, Libvirt, QEMU, ... perform most of the VM Live Migration. References.

      Migration Status Reporting

      When starting a virtual machine instance (VMI), it has also been calculated whether the machine is live migratable. The result is being stored in the VMI VMI.status.conditions. The calculation can be based on multiple parameters of the VMI, however, at the moment, the calculation is largely based on the Access Mode of the VMI volumes. Live migration is only permitted when the volume access mode is set to ReadWriteMany. Requests to migrate a non-LiveMigratable VMI will be rejected.

      The reported Migration Method is also being calculated during VMI start. BlockMigration indicates that some of the VMI disks require copying from the source to the destination. LiveMigration means that only the instance memory will be copied.

      Status:
      Conditions:
      Status: True
      Type: LiveMigratable
      Migration Method: BlockMigration

      Migration Status

      The migration progress status is reported in VMI.status. Most importantly, it indicates whether the migration has been completed or failed.

      Below is an example of a successful migration.

      Migration State:
      Completed: true
      End Timestamp: 2019-03-29T03:37:52Z
      Migration Config:
      Completion Timeout Per GiB: 800
      Progress Timeout: 150
      Migration UID: c64d4898-51d3-11e9-b370-525500d15501
      Source Node: node02
      Start Timestamp: 2019-03-29T04:02:47Z
      Target Direct Migration Node Ports:
      35001: 0
      41068: 49152
      38284: 49153
      Target Node: node01
      Target Node Address: 10.128.0.46
      Target Node Domain Detected: true
      Target Pod: virt-launcher-testvmimcbjgw6zrzcmp8wpddvztvzm7x2k6cjbdgktwv8tkq

      VM Live Migration Strategies

      VM Live Migration is a process during which a running Virtual Machine Instance moves to another compute node while the guest workload continues to run and remain accessible.

      Understanding Different VM Live Migration Strategies

      VM Live Migration is a complex process. During a migration, the source VM needs to transfer its whole state (mainly RAM) to the target VM. If there are enough resources available, such as network bandwidth and CPU power, migrations should converge nicely. If this is not the scenario, however, the migration might get stuck without an ability to progress.

      The main factor that affects migrations from the guest perspective is its dirty rate, which is the rate by which the VM dirties memory. Guests with high dirty rate lead to a race during migration. On the one hand, memory would be transferred continuously to the target, and on the other, the same memory would get dirty by the guest. On such scenarios, one could consider to use more advanced migration strategies. Refer to Understanding different migration strategies for more details.

      There are 3 VM Live Migration strategies/policies:

      VM Live Migration Strategy: Pre-copy

      Pre-copy is the default strategy. It should be used for most cases.

      The way it works is as following:

      1. The target VM is created, but the guest keeps running on the source VM.
      2. The source starts sending chunks of VM state (mostly memory) to the target. This continues until all of the state has been transferred to the target.
      3. The guest starts executing on the target VM. 4. The source VM is being removed.

      Pre-copy is the safest and fastest strategy for most cases. Furthermore, it can be easily cancelled, can utilize multithreading, and more. If there is no real reason to use another strategy, this is definitely the strategy to go with.

      However, on some cases migrations might not converge easily, that is, by the time the chunk of source VM state would be received by the target VM, it would already be mutated by the source VM (which is the VM the guest executes on). There are many reasons for migrations to fail converging, such as a high dirty-rate or low resources like network bandwidth and CPU. On such scenarios, see the following alternative strategies below.

      VM Live Migration Strategy: Post-copy

      The way post-copy migrations work is as following:

      1. The target VM is created.
      2. The guest is being run on the target VM.
      3. The source starts sending chunks of VM state (mostly memory) to the target.
      4. When the guest, running on the target VM, would access memory: 1. If the memory exists on the target VM, the guest can access it. 2. Otherwise, the target VM asks for a chunk of memory from the source VM.
      5. Once all of the memory state is updated at the target VM, the source VM is being removed.

      The main idea here is that the guest starts to run immediately on the target VM. This approach has advantages and disadvantages:

      Advantages:

      • The same memory chink is never being transferred twice. This is possible due to the fact that with post-copy it doesn't matter that a page had been dirtied since the guest is already running on the target VM.
      • This means that a high dirty-rate has much less effect.
      • Consumes less network bandwidth.

      Disadvantages:

      • When using post-copy, the VM state has no one source of truth. When the guest (running on the target VM) writes to memory, this memory is one part of the guest's state, but some other parts of it may still be updated only at the source VM. This situation is generally dangerous, since, for example, if either the target or guest VMs crash the state cannot be recovered.
      • Slow warmup: when the guest starts executing, no memory is present at the target VM. Therefore, the guest would have to wait for a lot of memory in a short period of time.
      • Slower than pre-copy on most cases.
      • Harder to cancel a migration.

      VM Live Migration Strategy: Auto-converge

      Auto-converge is a technique to help pre-copy migrations converge faster without changing the core algorithm of how the migration works.

      Since a high dirty-rate is usually the most significant factor for migrations to not converge, auto-converge simply throttles the guest's CPU. If the migration would converge fast enough, the guest's CPU would not be throttled or throttled negligibly. But, if the migration would not converge fast enough, the CPU would be throttled more and more as time goes.

      This technique dramatically increases the probability of the migration converging eventually.

      Observe the VM Live Migration Progress and Result

      Migration Timeouts

      Depending on the type, the live migration process will copy virtual machine memory pages and disk blocks to the destination. During this process non-locked pages and blocks are being copied and become free for the instance to use again. To achieve a successful migration, it is assumed that the instance will write to the free pages and blocks (pollute the pages) at a lower rate than these are being copied.

      Completion Time

      In some cases the virtual machine can write to different memory pages / disk blocks at a higher rate than these can be copied, which will prevent the migration process from completing in a reasonable amount of time. In this case, live migration will be aborted if it is running for a long period of time. The timeout is calculated base on the size of the VMI, it's memory and the ephemeral disks that are needed to be copied. The configurable parameter completionTimeoutPerGiB, which defaults to 800s is the time for GiB of data to wait for the migration to be completed before aborting it. A VMI with 8Gib of memory will time out after 6400 seconds.

      Progress Timeout

      A VM Live Migration will also be aborted when it notices that copying memory doesn't make any progress. The time to wait for live migration to make progress in transferring data is configurable by the progressTimeout parameter, which defaults to 150 seconds.

      VM Live Migration Configurations

      Changing Cluster Wide Migration Limits

      KubeVirt puts some limits in place so that migrations don't overwhelm the cluster. By default, it is to only run 5 migrations in parallel with an additional limit of a maximum of 2 outbound migrations per node. Finally, every migration is limited to a bandwidth of 64MiB/s.

      You can change these values in the kubevirt CR:

          apiVersion: kubevirt.io/v1
      kind: Kubevirt
      metadata:
      name: kubevirt
      namespace: kubevirt
      spec:
      configuration:
      migrations:
      parallelMigrationsPerCluster: 5
      parallelOutboundMigrationsPerNode: 2
      bandwidthPerMigration: 64Mi
      completionTimeoutPerGiB: 800
      progressTimeout: 150
      disableTLS: false
      nodeDrainTaintKey: "kubevirt.io/drain"
      allowAutoConverge: false ---------------------> related to: Auto-converge
      allowPostCopy: false -------------------------> related to: Post-copy
      unsafeMigrationOverride: false

      Remember that most of these configurations can be overridden and fine-tuned to a specified group of VMs. For more information, please refer to the Migration Policies section below.

      Migration Policies

      Migration policies provides a new way of applying migration configurations to Virtual Machines. The policies can refine Kubevirt CR's MigrationConfiguration that sets the cluster-wide migration configurations. This way, the cluster-wide settings default how the migration policy can be refined (i.e., changed, removed, or added).

      Remember that migration policies are in version v1alpha1. This means that this API is not fully stable yet and that APIs may change in the future.

      Migration Configurations

      Currently, the MigrationPolicy spec only includes the following configurations from Kubevirt CR's MigrationConfiguration. (In the future, more configurations that aren't part of Kubevirt CR will be added):

      apiVersion: migrations.kubevirt.io/v1alpha1
      kind: MigrationPolicy
      spec:
      allowAutoConverge: true
      bandwidthPerMigration: 217Ki
      completionTimeoutPerGiB: 23
      allowPostCopy: false

      All the above fields are optional. When omitted, the configuration will be applied as defined in KubevirtCR's MigrationConfiguration. This way, KubevirtCR will serve as a configurable set of defaults for both VMs that are not bound to any MigrationPolicy and VMs that are bound to a MigrationPolicy that does not define all fields of the configurations.

      Matching Policies to VMs

      Next in the spec are the selectors defining the group of VMs to apply the policy. The options to do so are the following.

      This policy applies to the VMs in namespaces that have all the required labels:

      apiVersion: migrations.kubevirt.io/v1alpha1
      kind: MigrationPolicy
      spec:
      selectors:
      namespaceSelector:
      hpc-workloads: true # Matches a key and a value

      The policy below applies to the VMs that have all the required labels:

      apiVersion: migrations.kubevirt.io/v1alpha1
      kind: MigrationPolicy
      spec:
      selectors:
      virtualMachineInstanceSelector:
      workload-type: db # Matches a key and a value

      References

      Documents

      Libvirt Guest Migration

      Libvirt has a chapter to describe the pricipal of VM/Guest Live Migration.

      https://libvirt.org/migration.html

      Kubevirt Live Migration

      https://kubevirt.io/user-guide/operations/live_migration/

      Source Code

      The VM Live Migration related configuration options are passed to each layer correspondingly.

      Kubevirt

      https://github.com/kubevirt/kubevirt/blob/d425593ae392111dab80403ef0cde82625e37653/pkg/virt-launcher/virtwrap/live-migration-source.go#L103

      ...
      import "libvirt.org/go/libvirt"

      ...

      func generateMigrationFlags(isBlockMigration, migratePaused bool, options *cmdclient.MigrationOptions) libvirt.DomainMigrateFlags {
      ...
      if options.AllowAutoConverge {
      migrateFlags |= libvirt.MIGRATE_AUTO_CONVERGE
      }
      if options.AllowPostCopy {
      migrateFlags |= libvirt.MIGRATE_POSTCOPY
      }
      ...
      }

      Go Package Libvirt

      https://pkg.go.dev/libvirt.org/go/libvirt

      const (
      ...
      MIGRATE_AUTO_CONVERGE = DomainMigrateFlags(C.VIR_MIGRATE_AUTO_CONVERGE)
      MIGRATE_RDMA_PIN_ALL = DomainMigrateFlags(C.VIR_MIGRATE_RDMA_PIN_ALL)
      MIGRATE_POSTCOPY = DomainMigrateFlags(C.VIR_MIGRATE_POSTCOPY)
      ...
      )

      Libvirt

      https://github.com/libvirt/libvirt/blob/bfe53e9145cd5996a791c5caff0686572b850f82/include/libvirt/libvirt-domain.h#L1030

          /* Enable algorithms that ensure a live migration will eventually converge.
      * This usually means the domain will be slowed down to make sure it does
      * not change its memory faster than a hypervisor can transfer the changed
      * memory to the destination host. VIR_MIGRATE_PARAM_AUTO_CONVERGE_*
      * parameters can be used to tune the algorithm.
      *
      * Since: 1.2.3
      */
      VIR_MIGRATE_AUTO_CONVERGE = (1 << 13),
      ...
      /* Setting the VIR_MIGRATE_POSTCOPY flag tells libvirt to enable post-copy
      * migration. However, the migration will start normally and
      * virDomainMigrateStartPostCopy needs to be called to switch it into the
      * post-copy mode. See virDomainMigrateStartPostCopy for more details.
      *
      * Since: 1.3.3
      */
      VIR_MIGRATE_POSTCOPY = (1 << 15),

      · 4 min read
      Hang Yu

      Starting with Harvester v1.2.0, it offers the capability to install a Container Storage Interface (CSI) in your Harvester cluster. This allows you to leverage external storage for the Virtual Machine's non-system data disk, giving you the flexibility to use different drivers tailored for specific needs, whether it's for performance optimization or seamless integration with your existing in-house storage solutions.

      It's important to note that, despite this enhancement, the provisioner for the Virtual Machine (VM) image in Harvester still relies on Longhorn. Prior to version 1.2.0, Harvester exclusively supported Longhorn for storing VM data and did not offer support for external storage as a destination for VM data.

      One of the options for integrating external storage with Harvester is Rook, an open-source cloud-native storage orchestrator. Rook provides a robust platform, framework, and support for Ceph storage, enabling seamless integration with cloud-native environments.

      Ceph is a software-defined distributed storage system that offers versatile storage capabilities, including file, block, and object storage. It is designed for large-scale production clusters and can be deployed effectively in such environments.

      Rook simplifies the deployment and management of Ceph, offering self-managing, self-scaling, and self-healing storage services. It leverages Kubernetes resources to automate the deployment, configuration, provisioning, scaling, upgrading, and monitoring of Ceph.

      In this article, we will walk you through the process of installing, configuring, and utilizing Rook to use storage from an existing external Ceph cluster as a data disk for a VM within the Harvester environment.

      Install Harvester Cluster

      Harvester's operating system follows an immutable design, meaning that most OS files revert to their pre-configured state after a reboot. To accommodate Rook Ceph's requirements, you need to add specific persistent paths to the os.persistentStatePaths section in the Harvester configuration. These paths include:

      os:
      persistent_state_paths:
      - /var/lib/rook
      - /var/lib/ceph
      modules:
      - rbd
      - nbd

      After the cluster is installed, refer to How can I access the kubeconfig file of the Harvester cluster? to get the kubeconfig of the Harvester cluster.

      Install Rook to Harvester

      Install Rook to the Harvester cluster by referring to Rook Quickstart.

      curl -fsSLo rook.tar.gz https://github.com/rook/rook/archive/refs/tags/v1.12.2.tar.gz \
      && tar -zxf rook.tar.gz && cd rook-1.12.2/deploy/examples
      # apply configurations ref: https://rook.github.io/docs/rook/v1.12/Getting-Started/example-configurations/
      kubectl apply -f crds.yaml -f common.yaml -f operator.yaml
      kubectl -n rook-ceph wait --for=condition=Available deploy rook-ceph-operator --timeout=10m

      Using an existing external Ceph cluster

      1. Run the python script create-external-cluster-resources.py in the existing external Ceph cluster for creating all users and keys.
      # script help ref: https://www.rook.io/docs/rook/v1.12/CRDs/Cluster/external-cluster/#1-create-all-users-and-keys
      curl -s https://raw.githubusercontent.com/rook/rook/v1.12.2/deploy/examples/create-external-cluster-resources.py > create-external-cluster-resources.py
      python3 create-external-cluster-resources.py --rbd-data-pool-name <pool_name> --namespace rook-ceph-external --format bash
      1. Copy the Bash output.

      Example output:

      export NAMESPACE=rook-ceph-external
      export ROOK_EXTERNAL_FSID=b3b47828-4c60-11ee-be38-51902f85c805
      export ROOK_EXTERNAL_USERNAME=client.healthchecker
      export ROOK_EXTERNAL_CEPH_MON_DATA=ceph-1=192.168.5.99:6789
      export ROOK_EXTERNAL_USER_SECRET=AQDd6/dkFyu/IhAATv/uCMbHtWk4AYK2KXzBhQ==
      export ROOK_EXTERNAL_DASHBOARD_LINK=https://192.168.5.99:8443/
      export CSI_RBD_NODE_SECRET=AQDd6/dk2HsjIxAA06Yw9UcOg0dfwV/9IFBRhA==
      export CSI_RBD_NODE_SECRET_NAME=csi-rbd-node
      export CSI_RBD_PROVISIONER_SECRET=AQDd6/dkEY1kIxAAAzrXZnVRf4x+wDUz1zyaQg==
      export CSI_RBD_PROVISIONER_SECRET_NAME=csi-rbd-provisioner
      export MONITORING_ENDPOINT=192.168.5.99
      export MONITORING_ENDPOINT_PORT=9283
      export RBD_POOL_NAME=test
      export RGW_POOL_PREFIX=default
      1. Consume the external Ceph cluster resources on the Harvester cluster.
      # Paste the above output from create-external-cluster-resources.py into import-env.sh
      vim import-env.sh
      source import-env.sh
      # this script will create a StorageClass ceph-rbd
      source import-external-cluster.sh
      kubectl apply -f common-external.yaml
      kubectl apply -f cluster-external.yaml
      # wait for all pods to become Ready
      watch 'kubectl --namespace rook-ceph get pods'
      1. Create the VolumeSnapshotClass csi-rbdplugin-snapclass-external.
      cat >./csi/rbd/snapshotclass-external.yaml <<EOF
      ---
      apiVersion: snapshot.storage.k8s.io/v1
      kind: VolumeSnapshotClass
      metadata:
      name: csi-rbdplugin-snapclass-external
      driver: rook-ceph.rbd.csi.ceph.com # driver:namespace:operator
      parameters:
      clusterID: rook-ceph-external # namespace:cluster
      csi.storage.k8s.io/snapshotter-secret-name: rook-csi-rbd-provisioner
      csi.storage.k8s.io/snapshotter-secret-namespace: rook-ceph-external # namespace:cluster
      deletionPolicy: Delete
      EOF

      kubectl apply -f ./csi/rbd/snapshotclass-external.yaml

      Configure Harvester Cluster

      Before you can make use of Harvester's Backup & Snapshot features, you need to set up some essential configurations through the Harvester csi-driver-config setting. To set up these configurations, follow these steps:

      1. Login to the Harvester UI, then navigate to Advanced > Settings.
      2. Find and select csi-driver-config, and then click on the > Edit Setting to access the configuration options.
      3. In the settings, set the Provisioner to rook-ceph.rbd.csi.ceph.com.
      4. Next, specify the Volume Snapshot Class Name as csi-rbdplugin-snapclass-external. This setting points to the name of the VolumeSnapshotClass used for creating volume snapshots or VM snapshots.
      5. Similarly, set the Backup Volume Snapshot Class Name to csi-rbdplugin-snapclass-external. This corresponds to the name of the VolumeSnapshotClass responsible for creating VM backups.

      csi-driver-config-external

      Use Rook Ceph in Harvester

      After successfully configuring these settings, you can proceed to utilize the Rook Ceph StorageClass, which is named rook-ceph-block for the internal Ceph cluster or named ceph-rbd for the external Ceph cluster. You can apply this StorageClass when creating an empty volume or adding a new block volume to a VM, enhancing your Harvester cluster's storage capabilities.

      With these configurations in place, your Harvester cluster is ready to make the most of the Rook Ceph storage integration.

      rook-ceph-volume-external

      rook-ceph-vm-external

      · 3 min read
      Canwu Yao

      As Harvester v1.2.0 is released, a new Harvester cloud provider version 0.2.2 is integrated into RKE2 v1.24.15+rke2r1, v1.25.11+rke2r1, v1.26.6+rke2r1, v1.27.3+rke2r1, and newer versions.

      With Harvester v1.2.0, the new Harvester cloud provider offers enhanced load balancing capabilities for guest Kubernetes services. Specifically, it introduces the Harvester IP Pool feature, a built-in IP address management (IPAM) solution for the Harvester load balancer. It allows you to define an IP pool specific to a particular guest cluster by specifying the guest cluster name. For example, you can create an IP pool exclusively for the guest cluster named cluster2:

      image

      However, after upgrading, the feature is not automatically compatible with existing guest Kubernetes clusters, as they do not pass the correct cluster name to the Harvester cloud provider. Refer to issue 4232 for more details. Users can manually upgrade the Harvester cloud provider using Helm as a workaround and provide the correct cluster name after upgrading. However, this would result in a change in the load balancer IPs.

      This article outlines a workaround that allows you to leverage the new IP pool feature while keeping the load balancer IPs unchanged.

      Prerequisites

      • Download the Harvester kubeconfig file from the Harvester UI. If you have imported Harvester into Rancher, do not use the kubeconfig file from the Rancher UI. Refer to Access Harvester Cluster to get the desired one.

      • Download the kubeconfig file for the guest Kubernetes cluster you plan to upgrade. Refer to Accessing Clusters with kubectl from Your Workstation for instructions on how to download the kubeconfig file.

      Steps to Keep Load Balancer IP

      1. Execute the following script before upgrading.

        curl -sfL https://raw.githubusercontent.com/harvester/harvesterhci.io/main/kb/2023-08-21/keepip.sh | sh -s before_upgrade <Harvester-kubeconfig-path> <guest-cluster-kubeconfig-path> <guest-cluster-name> <guest-cluster-nodes-namespace>
        • <Harvester-kubeconfig-path>: Path to the Harvester kubeconfig file.
        • <guest-cluster-kubeconfig-path>: Path to the kubeconfig file of your guest Kubernetes cluster.
        • <guest-cluster-name>: Name of your guest cluster.
        • <guest-cluster-nodes-namespace>: Namespace where the VMs of the guest cluster are located.

        The script will help users copy the DHCP information to the service annotation and modify the IP pool allocated history to make sure the IP is unchanged.

        image

        After executing the script, the load balancer service with DHCP mode will be annotated with the DHCP information. For example:

        apiVersion: v1
        kind: Service
        metadata:
        annotations:
        kube-vip.io/hwaddr: 00:00:6c:4f:18:68
        kube-vip.io/requestedIP: 172.19.105.215
        name: lb0
        namespace: default

        As for the load balancer service with pool mode, the IP pool allocated history will be modified as the new load balancer name. For example:

        apiVersion: loadbalancer.harvesterhci.io/v1beta1
        kind: IPPool
        metadata:
        name: default
        spec:
        ...
        status:
        allocatedHistory:
        192.168.100.2: default/cluster-name-default-lb1-ddc13071 # replace the new load balancer name
      2. Add network selector for the pool.

        For example, the following cluster is under the VM network default/mgmt-untagged. The network selector should be default/mgmt-untagged.

        image

        image

      3. Upgrade the RKE2 cluster in the Rancher UI and select the new version.

        image

      4. Execute the script after upgrading.

        curl -sfL https://raw.githubusercontent.com/harvester/harvesterhci.io/main/kb/2023-08-21/keepip.sh | sh -s after_upgrade <Harvester-kubeconfig-path> <guest-cluster-kubeconfig-path> <guest-cluster-name> <guest-cluster-nodes-namespace>

        image

        In this step, the script wraps the operations to upgrade the Harvester cloud provider to set the cluster name. After the Harvester cloud provider is running, the new Harvester load balancers will be created with the unchanged IPs.

      · 7 min read

      This article covers instructions for installing the Netapp Astra Trident CSI driver into a Harvester cluster, which enables NetApp storage systems to store storage volumes usable by virtual machines running in Harvester.

      The NetApp storage will be an option in addition to the normal Longhorn storage; it will not replace Longhorn. Virtual machine images will still be stored using Longhorn.

      This has been tested with Harvester 1.2.0 and Trident v23.07.0.

      This procedure only works to access storage via iSCSI, not NFS.

      note

      3rd party storage classes (including those based on Trident) can only be used for non-boot volumes of Harvester VMs.

      Detailed Instructions

      We assume that before beginning this procedure, a Harvester cluster and a NetApp ONTAP storage system are both installed and configured for use.

      Most of these steps can be performed on any system with the helm and kubectl commands installed and network connectivity to the management port of the Harvester cluster. Let's call this your workstation. Certain steps must be performed on one or more cluster nodes themselves. The steps described below should be done on your workstation unless otherwise indicated.

      The last step (enabling multipathd) should be done on all nodes after the Trident CSI has been installed.

      Certain parameters of your installation will require modification of details in the examples in the procedure given below. Those which you may wish to modify include:

      • The namespace. trident is used as the namespace in the examples, but you may prefer to use another.
      • The name of the deployment. mytrident is used but you can change this to something else.
      • The management IP address of the ONTAP storage system
      • Login credentials (username and password) of the ONTAP storage system

      The procedure is as follows.

      1. Read the NetApp Astra Trident documentation:

        The simplest method is to install using Helm; that process is described here.

      2. Download the KubeConfig from the Harvester cluster.

        • Open the web UI for your Harvester cluster
        • In the lower left corner, click the "Support" link. This will take you to a "Harvester Support" page.
        • Click the button labeled "Download KubeConfig". This will download a your cluster config in a file called "local.yaml" by default.
        • Move this file to a convenient location and set your KUBECONFIG environment variable to the path of this file.
      3. Prepare the cluster for installation of the Helm chart.

        Before starting installation of the helm chart, special authorization must be provided to enable certain modifications to be made during the installation. This addresses the issue described here: https://github.com/NetApp/trident/issues/839

        • Put the following text into a file. For this example we'll call it authorize_trident.yaml.

          ---
          apiVersion: rbac.authorization.k8s.io/v1
          kind: ClusterRole
          metadata:
          name: trident-operator-psa
          rules:
          - apiGroups:
          - management.cattle.io
          resources:
          - projects
          verbs:
          - updatepsa
          ---
          apiVersion: rbac.authorization.k8s.io/v1
          kind: ClusterRoleBinding
          metadata:
          name: trident-operator-psa
          roleRef:
          apiGroup: rbac.authorization.k8s.io
          kind: ClusterRole
          name: trident-operator-psa
          subjects:
          - kind: ServiceAccount
          name: trident-operator
          namespace: trident
        • Apply this manifest via the command kubectl apply -f authorize_trident.yaml.

      4. Install the helm chart.

        • First you will need to add the Astra Trident Helm repository:

          helm repo add netapp-trident https://netapp.github.io/trident-helm-chart
        • Next, install the Helm chart. This example uses mytrident as the deployment name, trident as the namespace, and 23.07.0 as the version number to install:

          helm install mytrident netapp-trident/trident-operator --version 23.07.0 --create-namespace --namespace trident
        • The NetApp documentation describes variations on how you can do this.

      5. Download and extract the tridentctl command, which will be needed for the next few steps.

        This and the next few steps need to be performed logged into a master node of the Harvester cluster, using root access.

        cd /tmp
        curl -L -o trident-installer-23.07.0.tar.gz https://github.com/NetApp/trident/releases/download/v23.07.0/trident-installer-23.07.0.tar.gz
        tar -xf trident-installer-23.07.0.tar.gz
        cd trident-installer
      6. Install a backend.

        This part is specific to Harvester.

        1. Put the following into a text file, for example /tmp/backend.yaml

          version: 1
          backendName: default_backend_san
          storageDriverName: ontap-san-economy
          managementLIF: 172.19.97.114
          svm: default_backend
          username: admin
          password: password1234
          labels:
          name: default_backend_san

          The LIF IP address, username, and password of this file should be replaced with the management LIF and credentials for the ONTAP system.

        2. Create the backend

          ./tridentctl create backend -f /tmp/backend.yaml -n trident
        3. Check that it is created

          ./tridentctl get backend -n trident
      7. Define a StorageClass and SnapshotClass.

        1. Put the following into a file, for example /tmp/storage.yaml

          ---
          apiVersion: storage.k8s.io/v1
          kind: StorageClass
          metadata:
          name: ontap-san-economy
          provisioner: csi.trident.netapp.io
          parameters:
          selector: "name=default_backend_san"
          ---
          apiVersion: snapshot.storage.k8s.io/v1
          kind: VolumeSnapshotClass
          metadata:
          name: csi-snapclass
          driver: csi.trident.netapp.io
          deletionPolicy: Delete
        2. Apply the definitions:

          kubectl apply -f /tmp/storage.yaml
      8. Enable multipathd

        The following is required to enable multipathd. @@ -32,7 +32,7 @@ device vendor and product. Here is an example of what you'll want in /etc/multipath.conf:

        blacklist {
        device {
        vendor "!NETAPP"
        product "!LUN"
        }
        }
        blacklist_exceptions {
        device {
        vendor "NETAPP"
        product "LUN"
        }
        }

        This example only works if NetApp is the only storage provider in the system for which multipathd must be used. More complex environments will require more complex configuration.

        Explicitly putting that content into /etc/multipath.conf will work when you start multipathd as described below, but the change in /etc will not persist across node reboots. To solve that problem, you should add another file to /oem that will re-generate /etc/multipath.conf when the node reboots. The following example will create the /etc/multipath.conf given in the example above, but may need to be modified for your environment if you have a more complex iSCSI configuration:

        stages:
        initramfs:
        - name: "Configure multipath blacklist and whitelist"
        files:
        - path: /etc/multipath.conf
        permissions: 0644
        owner: 0
        group: 0
        content: |
        blacklist {
        device {
        vendor "!NETAPP"
        product "!LUN"
        }
        }
        blacklist_exceptions {
        device {
        vendor "NETAPP"
        product "LUN"
        }
        }

        Remember, this has to be done on every node.

      9. Enable multipathd.

        Adding the above files to /oem will take effect on the next reboot of the node; multipathd can be enabled immediately without rebooting the node using the following commands:

        systemctl enable multipathd
        systemctl start multipathd

        After the above steps, the ontap-san-economy storage class should be available when creating a volume for a Harvester VM.

    · 7 min read
    Kiefer Chang

    Harvester v1.2.0 introduces a new enhancement where Longhorn system-managed components in newly-deployed clusters are automatically assigned a system-cluster-critical priority class by default. However, when upgrading your Harvester clusters from previous versions, you may notice that Longhorn system-managed components do not have any priority class set.

    This behavior is intentional and aimed at supporting zero-downtime upgrades. Longhorn does not allow changing the priority-class setting when attached volumes exist. For more details, please refer to Setting Priority Class During Longhorn Installation).

    This article explains how to manually configure priority classes for Longhorn system-managed components after upgrading your Harvester cluster, ensuring that your Longhorn components have the appropriate priority class assigned and maintaining the stability and performance of your system.

    Stop all virtual machines

    Stop all virtual machines (VMs) to detach all volumes. Please back up any work before doing this.

    1. Login to a Harvester controller node and become root.

    2. Get all running VMs and write down their namespaces and names:

      kubectl get vmi -A

      Alternatively, you can get this information by backing up the Virtual Machine Instance (VMI) manifests with the following command:

      kubectl get vmi -A -o json > vmi-backup.json
    3. Shut down all VMs. Log in to all running VMs and shut them down gracefully (recommended). Or use the following command to send shutdown signals to all VMs:

      kubectl get vmi -A -o json | jq -r '.items[] | [.metadata.name, .metadata.namespace] | @tsv' | while IFS=$'\t' read -r name namespace; do
      if [ -z "$name" ]; then
      break
      fi
      echo "Stop ${namespace}/${name}"
      virtctl stop $name -n $namespace
      done
      note

      You can also stop all VMs from the Harvester UI:

      1. Go to the Virtual Machines page.
      2. For each VM, select > Stop.
    4. Ensure there are no running VMs:

      Run the command:

      kubectl get vmi -A

      The above command must return:

      No resources found

    Scale down monitoring pods

    1. Scale down the Prometheus deployment. Run the following command and wait for all Prometheus pods to terminate:

      kubectl patch -n cattle-monitoring-system prometheus/rancher-monitoring-prometheus --patch '{"spec": {"replicas": 0}}' --type merge && \
      sleep 5 && \
      kubectl rollout status --watch=true -n cattle-monitoring-system statefulset/prometheus-rancher-monitoring-prometheus

      A sample output looks like this:

      prometheus.monitoring.coreos.com/rancher-monitoring-prometheus patched
      statefulset rolling update complete 0 pods at revision prometheus-rancher-monitoring-prometheus-cbf6bd5f7...
    2. Scale down the AlertManager deployment. Run the following command and wait for all AlertManager pods to terminate:

      kubectl patch -n cattle-monitoring-system alertmanager/rancher-monitoring-alertmanager --patch '{"spec": {"replicas": 0}}' --type merge && \
      sleep 5 && \
      kubectl rollout status --watch=true -n cattle-monitoring-system statefulset/alertmanager-rancher-monitoring-alertmanager

      A sample output looks like this:

      alertmanager.monitoring.coreos.com/rancher-monitoring-alertmanager patched
      statefulset rolling update complete 0 pods at revision alertmanager-rancher-monitoring-alertmanager-c8c459dff...
    3. Scale down the Grafana deployment. Run the following command and wait for all Grafana pods to terminate:

      kubectl scale --replicas=0 deployment/rancher-monitoring-grafana -n cattle-monitoring-system && \
      sleep 5 && \
      kubectl rollout status --watch=true -n cattle-monitoring-system deployment/rancher-monitoring-grafana

      A sample output looks like this:

      deployment.apps/rancher-monitoring-grafana scaled
      deployment "rancher-monitoring-grafana" successfully rolled out

    Scale down vm-import-controller pods

    1. Check if the vm-import-controller addon is enabled and configured with a persistent volume with the following command:

      kubectl get pvc -n harvester-system harvester-vm-import-controller

      If the above command returns an output like this, you must scale down the vm-import-controller pod. Otherwise, you can skip the following step.

      NAME                             STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLASS         AGE
      harvester-vm-import-controller Bound pvc-eb23e838-4c64-4650-bd8f-ba7075ab0559 200Gi RWO harvester-longhorn 2m53s
    2. Scale down the vm-import-controller pods with the following command:

      kubectl scale --replicas=0 deployment/harvester-vm-import-controller -n harvester-system && \
      sleep 5 && \
      kubectl rollout status --watch=true -n harvester-system deployment/harvester-vm-import-controller

      A sample output looks like this:

      deployment.apps/harvester-vm-import-controller scaled
      deployment "harvester-vm-import-controller" successfully rolled out

    Set the priority-class setting

    1. Before applying the priority-class setting, you need to verify all volumes are detached. Run the following command to verify the STATE of each volume is detached:

      kubectl get volumes.longhorn.io -A

      Verify the output looks like this:

      NAMESPACE         NAME                                       STATE      ROBUSTNESS   SCHEDULED   SIZE           NODE   AGE
      longhorn-system pvc-5743fd02-17a3-4403-b0d3-0e9b401cceed detached unknown 5368709120 15d
      longhorn-system pvc-7e389fe8-984c-4049-9ba8-5b797cb17278 detached unknown 53687091200 15d
      longhorn-system pvc-8df64e54-ecdb-4d4e-8bab-28d81e316b8b detached unknown 2147483648 15d
      longhorn-system pvc-eb23e838-4c64-4650-bd8f-ba7075ab0559 detached unknown 214748364800 11m
    2. Set the priority-class setting with the following command:

      kubectl patch -n longhorn-system settings.longhorn.io priority-class --patch '{"value": "system-cluster-critical"}' --type merge

      Longhorn system-managed pods will restart and then you need to check if all the system-managed components have a priority class set:

      Get the value of the priority class system-cluster-critical:

      kubectl get priorityclass system-cluster-critical

      Verify the output looks like this:

      NAME                      VALUE        GLOBAL-DEFAULT   AGE
      system-cluster-critical 2000000000 false 15d
    3. Use the following command to get pods' priority in the longhorn-system namespace:

      kubectl get pods -n longhorn-system -o custom-columns="Name":metadata.name,"Priority":.spec.priority
    4. Verify all system-managed components' pods have the correct priority. System-managed components include:

      • csi-attacher
      • csi-provisioner
      • csi-resizer
      • csi-snapshotter
      • engine-image-ei
      • instance-manager-e
      • instance-manager-r
      • longhorn-csi-plugin

    Scale up vm-import-controller pods

    If you scale down the vm-import-controller pods, you must scale it up again.

    1. Scale up the vm-import-controller pod. Run the command:

      kubectl scale --replicas=1 deployment/harvester-vm-import-controller -n harvester-system && \
      sleep 5 && \
      kubectl rollout status --watch=true -n harvester-system deployment/harvester-vm-import-controller

      A sample output looks like this:

      deployment.apps/harvester-vm-import-controller scaled
      Waiting for deployment "harvester-vm-import-controller" rollout to finish: 0 of 1 updated replicas are available...
      deployment "harvester-vm-import-controller" successfully rolled out
    2. Verify vm-import-controller is running using the following command:

      kubectl get pods --selector app.kubernetes.io/instance=vm-import-controller -A

      A sample output looks like this, the pod's STATUS must be Running:

      NAMESPACE          NAME                                              READY   STATUS    RESTARTS   AGE
      harvester-system harvester-vm-import-controller-6bd8f44f55-m9k86 1/1 Running 0 4m53s

    Scale up monitoring pods

    1. Scale up the Prometheus deployment. Run the following command and wait for all Prometheus pods to roll out:

      kubectl patch -n cattle-monitoring-system prometheus/rancher-monitoring-prometheus --patch '{"spec": {"replicas": 1}}' --type merge && \
      sleep 5 && \
      kubectl rollout status --watch=true -n cattle-monitoring-system statefulset/prometheus-rancher-monitoring-prometheus

      A sample output looks like:

      prometheus.monitoring.coreos.com/rancher-monitoring-prometheus patched
      Waiting for 1 pods to be ready...
      statefulset rolling update complete 1 pods at revision prometheus-rancher-monitoring-prometheus-cbf6bd5f7...
    2. Scale down the AlertManager deployment. Run the following command and wait for all AlertManager pods to roll out:

      kubectl patch -n cattle-monitoring-system alertmanager/rancher-monitoring-alertmanager --patch '{"spec": {"replicas": 1}}' --type merge && \
      sleep 5 && \
      kubectl rollout status --watch=true -n cattle-monitoring-system statefulset/alertmanager-rancher-monitoring-alertmanager

      A sample output looks like this:

      alertmanager.monitoring.coreos.com/rancher-monitoring-alertmanager patched
      Waiting for 1 pods to be ready...
      statefulset rolling update complete 1 pods at revision alertmanager-rancher-monitoring-alertmanager-c8bd4466c...
    3. Scale down the Grafana deployment. Run the following command and wait for all Grafana pods to roll out:

      kubectl scale --replicas=1 deployment/rancher-monitoring-grafana -n cattle-monitoring-system && \
      sleep 5 && \
      kubectl rollout status --watch=true -n cattle-monitoring-system deployment/rancher-monitoring-grafana

      A sample output looks like this:

      deployment.apps/rancher-monitoring-grafana scaled
      Waiting for deployment "rancher-monitoring-grafana" rollout to finish: 0 of 1 updated replicas are available...
      deployment "rancher-monitoring-grafana" successfully rolled out

    Start virtual machines

    1. Start a VM with the command:

      virtctl start $name -n $namespace

      Replace $name with the VM's name and $namespace with the VM's namespace. You can list all virtual machines with the command:

      kubectl get vms -A
      note

      You can also stop all VMs from the Harvester UI:

      1. Go to the Virtual Machines page.
      2. For each VM, select > Start.

      Alternatively, you can start all running VMs with the following command:

      cat vmi-backup.json | jq -r '.items[] | [.metadata.name, .metadata.namespace] | @tsv' | while IFS=$'\t' read -r name namespace; do
      if [ -z "$name" ]; then
      break
      fi
      echo "Start ${namespace}/${name}"
      virtctl start $name -n $namespace || true
      done

    · 2 min read
    Vicente Cheng

    Harvester OS is designed as an immutable operating system, which means you cannot directly install additional packages on it. While there is a way to install packages, it is strongly advised against doing so, as it may lead to system instability.

    If you only want to debug with the system, the preferred way is to package the toolbox image with all the needed packages.

    This article shares how to package your toolbox image and how to install any packages on the toolbox image that help you debug the system.

    For example, if you want to analyze a storage performance issue, you can install blktrace on the toolbox image.

    Create a Dockerfile

    FROM opensuse/leap:15.4

    # Install blktrace
    RUN zypper in -y \
    blktrace

    RUN zypper clean --all

    Build the image and push

    # assume you are in the directory of Dockerfile
    $ docker build -t harvester/toolbox:dev .
    .
    .
    .
    naming to docker.io/harvester/toolbox:dev ...
    $ docker push harvester/toolbox:dev
    .
    .
    d4b76d0683d4: Pushed
    a605baa225e2: Pushed
    9e9058bdf63c: Layer already exists

    After you build and push the image, you can run the toolbox using this image to trace storage performance.

    Run the toolbox

    # use `privileged` flag only when you needed. blktrace need debugfs, so I add extra mountpoint.
    docker run -it --privileged -v /sys/kernel/debug/:/sys/kernel/debug/ --rm harvester/toolbox:dev bash

    # test blktrace
    6ffa8eda3aaf:/ $ blktrace -d /dev/nvme0n1 -o - | blkparse -i -
    259,0 10 3414 0.020814875 34084 Q WS 2414127984 + 8 [fio]
    259,0 10 3415 0.020815190 34084 G WS 2414127984 + 8 [fio]
    259,0 10 3416 0.020815989 34084 C WS 3206896544 + 8 [0]
    259,0 10 3417 0.020816652 34084 C WS 2140319184 + 8 [0]
    259,0 10 3418 0.020817992 34084 P N [fio]
    259,0 10 3419 0.020818227 34084 U N [fio] 1
    259,0 10 3420 0.020818437 34084 D WS 2414127984 + 8 [fio]
    259,0 10 3421 0.020821826 34084 Q WS 1743934904 + 8 [fio]
    259,0 10 3422 0.020822150 34084 G WS 1743934904 + 8 [fio]

    · 4 min read
    Vicente Cheng

    In earlier versions of Harvester (v1.0.3 and prior), Longhorn volumes may get corrupted during the replica rebuilding process (reference: Analysis: Potential Data/Filesystem Corruption). In Harvester v1.1.0 and later versions, the Longhorn team has fixed this issue. This article covers manual steps you can take to scan the VM's filesystem and repair it if needed.

    Stop The VM And Backup Volume

    Before you scan the filesystem, it is recommend you back up the volume first. For an example, refer to the following steps to stop the VM and backup the volume.

    • Find the target VM.

    finding the target VM

    • Stop the target VM.

    Stop the target VM

    The target VM is stopped and the related volumes are detached. Now go to the Longhorn UI to backup this volume.

    • Enable Developer Tools & Features (Preferences -> Enable Developer Tools & Features).

    Preferences then enable developer mode Enable the developer mode

    • Click the button and select Edit Config to edit the config page of the VM.

    goto edit config page of VM

    • Go to the Volumes tab and select Check volume details.

    link to longhorn volume page

    • Click the dropdown menu on the right side and select 'Attach' to attach the volume again.

    attach this volume again

    • Select the attached node.

    choose the attached node

    • Check the volume attached under Volume Details and select Take Snapshot on this volume page.

    take snapshot on volume page

    • Confirm that the snapshot is ready.

    check the snapshot is ready

    Now that you completed the volume backup, you need to scan and repair the root filesystem.

    Scanning the root filesystem and repairing

    This section will introduce how to scan the filesystem (e.g., XFS, EXT4) using related tools.

    Before scanning, you need to know the filesystem's device/partition.

    • Identify the filesystem's device by checking the major and minor numbers of that device.
    1. Obtain the major and minor numbers from the listed volume information.

      In the following example, the volume name is pvc-ea7536c0-301f-479e-b2a2-e40ddc864b58.

      harvester-node-0:~ # ls /dev/longhorn/pvc-ea7536c0-301f-479e-b2a2-e40ddc864b58 -al
      brw-rw---- 1 root root 8, 0 Oct 23 14:43 /dev/longhorn/pvc-ea7536c0-301f-479e-b2a2-e40ddc864b58

      The output indicates that the major and minor numbers are 8:0.

    2. Obtain the device name from the output of the lsblk command.

      harvester-node-0:~ # lsblk
      NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINTS
      loop0 7:0 0 3G 1 loop /
      sda 8:0 0 40G 0 disk
      ├─sda1 8:1 0 2M 0 part
      ├─sda2 8:2 0 20M 0 part
      └─sda3 8:3 0 40G 0 part

      The output indicates that 8:0 are the major and minor numbers of the device named sda. Therefore, /dev/sda is related to the volume named pvc-ea7536c0-301f-479e-b2a2-e40ddc864b58.

    • You should now know the filesystem's partition. In the example below, sda3 is the filesystem's partition.
    • Use the Filesystem toolbox image to scan and repair.
    # docker run -it --rm --privileged registry.opensuse.org/isv/rancher/harvester/toolbox/main/fs-toolbox:latest -- bash

    Then we try to scan with this target device.

    XFS

    When scanning an XFS filesystem, use the xfs_repair command and specify the problematic partition of the device.

    In the following example, /dev/sda3 is the problematic partition.

    # xfs_repair -n /dev/sda3

    To repair the corrupted partition, run the following command.

    # xfs_repair /dev/sda3

    EXT4

    When scanning a EXT4 filesystem, use the e2fsck command as follows, where the /dev/sde1 is the problematic partition of the device.

    # e2fsck -f /dev/sde1

    To repair the corrupted partition, run the following command.

    # e2fsck -fp /dev/sde1

    After using the 'e2fsck' command, you should also see logs related to scanning and repairing the partition. Scanning and repairing the corrupted partition is successful if there are no errors in these logs.

    Detach and Start VM again.

    After the corrupted partition is scanned and repaired, detach the volume and try to start the related VM again.

    • Detach the volume from the Longhorn UI.

    detach volume on longhorn UI

    • Start the related VM again from the Harvester UI.

    Start VM again

    Your VM should now work normally.

    · 2 min read
    Kiefer Chang

    Harvester replicates volumes data across disks in a cluster. Before removing a disk, the user needs to evict replicas on the disk to other disks to preserve the volumes' configured availability. For more information about eviction in Longhorn, please check Evicting Replicas on Disabled Disks or Nodes.

    Preparation

    This document describes how to evict Longhorn disks using the kubectl command. Before that, users must ensure the environment is set up correctly. There are two recommended ways to do this:

    1. Log in to any management node and switch to root (sudo -i).
    2. Download Kubeconfig file and use it locally
      • Install kubectl and yq program manually.
      • Open Harvester GUI, click support at the bottom left of the page and click Download KubeConfig to download the Kubeconfig file.
      • Set the Kubeconfig file's path to KUBECONFIG environment variable. For example, export KUBECONFIG=/path/to/kubeconfig.

    Evicting replicas from a disk

    1. List Longhorn nodes (names are identical to Kubernetes nodes):

      kubectl get -n longhorn-system nodes.longhorn.io

      Sample output:

      NAME    READY   ALLOWSCHEDULING   SCHEDULABLE   AGE
      node1 True true True 24d
      node2 True true True 24d
      node3 True true True 24d
    2. List disks on a node. Assume we want to evict replicas of a disk on node1:

      kubectl get -n longhorn-system nodes.longhorn.io node1 -o yaml | yq e '.spec.disks'

      Sample output:

      default-disk-ed7af10f5b8356be:
      allowScheduling: true
      evictionRequested: false
      path: /var/lib/harvester/defaultdisk
      storageReserved: 36900254515
      tags: []
    3. Assume disk default-disk-ed7af10f5b8356be is the target we want to evict replicas out of.

      Edit the node:

      kubectl edit -n longhorn-system nodes.longhorn.io node1 

      Update these two fields and save:

      • spec.disks.<disk_name>.allowScheduling to false
      • spec.disks.<disk_name>.evictionRequested to true

      Sample editing:

      default-disk-ed7af10f5b8356be:
      allowScheduling: false
      evictionRequested: true
      path: /var/lib/harvester/defaultdisk
      storageReserved: 36900254515
      tags: []
    4. Wait for all replicas on the disk to be evicted.

      Get current scheduled replicas on the disk:

      kubectl get -n longhorn-system nodes.longhorn.io node1 -o yaml | yq e '.status.diskStatus.default-disk-ed7af10f5b8356be.scheduledReplica'

      Sample output:

      pvc-86d3d212-d674-4c64-b69b-4a2eb1df2272-r-7b422db7: 5368709120
      pvc-b06f0b09-f30c-4936-8a2a-425b993dd6cb-r-bb0fa6b3: 2147483648
      pvc-b844bcc6-3b06-4367-a136-3909251cb560-r-08d1ab3c: 53687091200
      pvc-ea6e0dff-f446-4a38-916a-b3bea522f51c-r-193ca5c6: 10737418240

      Run the command repeatedly, and the output should eventually become an empty map:

      {}

      This means Longhorn evicts replicas on the disk to other disks.

      note

      If a replica always stays in a disk, please open the Longhorn GUI and check if there is free space on other disks.

    - + \ No newline at end of file diff --git a/kb/install_netapp_trident_csi/index.html b/kb/install_netapp_trident_csi/index.html index 1aff7dde..5985cd6c 100644 --- a/kb/install_netapp_trident_csi/index.html +++ b/kb/install_netapp_trident_csi/index.html @@ -9,7 +9,7 @@ Using NetApp Storage on Harvester | The open-source hyperconverged infrastructure solution for a cloud-native world - + @@ -30,7 +30,7 @@ The recommended method is to "whitelist" the Trident devices using device properties rather than device naming. The properties to allow are the device vendor and product. Here is an example of what you'll want in /etc/multipath.conf:

    blacklist {
    device {
    vendor "!NETAPP"
    product "!LUN"
    }
    }
    blacklist_exceptions {
    device {
    vendor "NETAPP"
    product "LUN"
    }
    }

    This example only works if NetApp is the only storage provider in the system for which multipathd must be used. More complex environments will require more complex configuration.

    Explicitly putting that content into /etc/multipath.conf will work when you start multipathd as described below, but the change in /etc will not persist across node reboots. To solve that problem, you should add another file to /oem that will re-generate /etc/multipath.conf when the node reboots. The following example will create the /etc/multipath.conf given in the example above, but may need to be modified for your environment if you have a more complex iSCSI configuration:

    stages:
    initramfs:
    - name: "Configure multipath blacklist and whitelist"
    files:
    - path: /etc/multipath.conf
    permissions: 0644
    owner: 0
    group: 0
    content: |
    blacklist {
    device {
    vendor "!NETAPP"
    product "!LUN"
    }
    }
    blacklist_exceptions {
    device {
    vendor "NETAPP"
    product "LUN"
    }
    }

    Remember, this has to be done on every node.

  • Enable multipathd.

    Adding the above files to /oem will take effect on the next reboot of the node; multipathd can be enabled immediately without rebooting the node using the following commands:

    systemctl enable multipathd
    systemctl start multipathd

    After the above steps, the ontap-san-economy storage class should be available when creating a volume for a Harvester VM.

  • - + \ No newline at end of file diff --git a/kb/multiple-nics-vm-connectivity/index.html b/kb/multiple-nics-vm-connectivity/index.html index 481cc786..faf8f58a 100644 --- a/kb/multiple-nics-vm-connectivity/index.html +++ b/kb/multiple-nics-vm-connectivity/index.html @@ -9,14 +9,14 @@ Multiple NICs VM Connectivity | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    Multiple NICs VM Connectivity

    · 4 min read
    Date Huang

    What is the default behavior of a VM with multiple NICs

    In some scenarios, you'll setup two or more NICs in your VM to serve different networking purposes. If all networks are setup by default with DHCP, you might get random connectivity issues. And while it might get fixed after rebooting the VM, it still will lose connection randomly after some period.

    How-to identify connectivity issues

    In a Linux VM, you can use commands from the iproute2 package to identify the default route.

    In your VM, execute the following command:

    ip route show default
    tip

    If you get the access denied error, please run the command using sudo

    The output of this command will only show the default route with the gateway and VM IP of the primary network interface (eth0 in the example below).

    default via <Gateway IP> dev eth0 proto dhcp src <VM IP> metric 100

    Here is the full example:

    $ ip route show default
    default via 192.168.0.254 dev eth0 proto dhcp src 192.168.0.100 metric 100

    However, if the issue covered in this KB occurs, you'll only be able to connect to the VM via the VNC or serial console.

    Once connected, you can run again the same command as before:

    $ ip route show default

    However, this time you'll get a default route with an incorrect gateway IP. For example:

    default via <Incorrect Gateway IP> dev eth0 proto dhcp src <VM's IP> metric 100

    Why do connectivity issues occur randomly

    In a standard setup, cloud-based VMs typically use DHCP for their NICs configuration. It will set an IP and a gateway for each NIC. Lastly, a default route to the gateway IP will also be added, so you can use its IP to connect to the VM.

    However, Linux distributions start multiple DHCP clients at the same time and do not have a priority system. This means that if you have two or more NICs configured with DHCP, the client will enter a race condition to configure the default route. And depending on the currently running Linux distribution DHCP script, there is no guarantee which default route will be configured.

    As the default route might change in every DHCP renewing process or after every OS reboot, this will create network connectivity issues.

    How to avoid the random connectivity issues

    You can easily avoid these connectivity issues by having only one NIC attached to the VM and having only one IP and one gateway configured.

    However, for VMs in more complex infrastructures, it is often not possible to use just one NIC. For example, if your infrastructure has a storage network and a service network. For security reasons, the storage network will be isolated from the service network and have a separate subnet. In this case, you must have two NICs to connect to both the service and storage networks.

    You can choose a solution below that meets your requirements and security policy.

    Disable DHCP on secondary NIC

    As mentioned above, the problem is caused by a race condition between two DHCP clients. One solution to avoid this problem is to disable DHCP for all NICs and configure them with static IPs only. Likewise, you can configure the secondary NIC with a static IP and keep the primary NIC enabled with DHCP.

    1. To configure the primary NIC with a static IP (eth0 in this example), you can edit the file /etc/sysconfig/network/ifcfg-eth0 with the following values:
    BOOTPROTO='static'
    IPADDR='192.168.0.100'
    NETMASK='255.255.255.0'

    Alternatively, if you want to reserve the primary NIC using DHCP (eth0 in this example), use the following values instead:

    BOOTPROTO='dhcp'
    DHCLIENT_SET_DEFAULT_ROUTE='yes'
    1. You need to configure the default route by editing the file /etc/sysconfig/network/ifroute-eth0 (if you configured the primary NIC using DHCP, skip this step):
    # Destination  Dummy/Gateway  Netmask  Interface
    default 192.168.0.254 - eth0
    warning

    Do not put other default route for your secondary NIC

    1. Finally, configure a static IP for the secondary NIC by editing the file /etc/sysconfig/network/ifcfg-eth1:
    BOOTPROTO='static'
    IPADDR='10.0.0.100'
    NETMASK='255.255.255.0'

    Cloud-Init config

    network:
    version: 1
    config:
    - type: physical
    name: eth0
    subnets:
    - type: dhcp
    - type: physical
    name: eth1
    subnets:
    - type: static
    address: 10.0.0.100/24

    Disable secondary NIC default route from DHCP

    If your secondary NIC requires to get its IP from DHCP, you'll need to disable the secondary NIC default route configuration.

    1. Confirm that the primary NIC configures its default route in the file /etc/sysconfig/network/ifcfg-eth0:
    BOOTPROTO='dhcp'
    DHCLIENT_SET_DEFAULT_ROUTE='yes'
    1. Disable the secondary NIC default route configuration by editing the file /etc/sysconfig/network/ifcfg-eth1:
    BOOTPROTO='dhcp'
    DHCLIENT_SET_DEFAULT_ROUTE='no'

    Cloud-Init config

    This solution is not available in Cloud-Init. Cloud-Init didn't allow any option for DHCP.

    - + \ No newline at end of file diff --git a/kb/nic-naming-scheme/index.html b/kb/nic-naming-scheme/index.html index b2fb4781..ba71267f 100644 --- a/kb/nic-naming-scheme/index.html +++ b/kb/nic-naming-scheme/index.html @@ -9,13 +9,13 @@ NIC Naming Scheme | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    NIC Naming Scheme

    · 2 min read
    Date Huang

    NIC Naming Scheme changed after upgrading to v1.0.1

    systemd in OpenSUSE Leap 15.3 which is the base OS of Harvester is upgraded to 246.16-150300.7.39.1. In this version, systemd will enable additional naming scheme sle15-sp3 which is v238 with bridge_no_slot. When there is a PCI bridge associated with NIC, systemd will never generate ID_NET_NAME_SLOT and naming policy in /usr/lib/systemd/network/99-default.link will fallback to ID_NET_NAME_PATH. According to this change, NIC names might be changed in your Harvester nodes during the upgrade process from v1.0.0 to v1.0.1-rc1 or above, and it will cause network issues that are associated with NIC names.

    Effect Settings and Workaround

    Startup Network Configuration

    NIC name changes will need to update the name in /oem/99_custom.yaml. You could use migration script to change the NIC names which are associated with a PCI bridge.

    tip

    You could find an identical machine to test naming changes before applying the configuration to production machines

    You could simply execute the script with root account in v1.0.0 via

    # python3 udev_v238_sle15-sp3.py

    It will output the patched configuration to the screen and you could compare it to the original one to ensure there is no exception. (e.g. We could use vimdiff to check the configuration)

    # python3 udev_v238_sle15-spe3.py > /oem/test
    # vimdiff /oem/test /oem/99_custom.yaml

    After checking the result, we could execute the script with --really-want-to-do to override the configuration. It will also back up the original configuration file with a timestamp before patching it.

    # python3 udev_v238_sle15-sp3.py --really-want-to-do

    Harvester VLAN Network Configuration

    If your VLAN network is associated with NIC name directly without bonding, you will need to migrate ClusterNetwork and NodeNetwork with the previous section together.

    note

    If your VLAN network is associated with the bonding name in /oem/99_custom.yaml, you could skip this section.

    Modify ClusterNetworks

    You need to modify ClusterNetworks via

    $ kubectl edit clusternetworks vlan

    search this pattern

    config:
    defaultPhysicalNIC: <Your NIC name>

    and change to new NIC name

    Modify NodeNetworks

    You need to modify NodeNetworks via

    $ kubectl edit nodenetworks <Node name>-vlan

    search this pattern

    spec:
    nic: <Your NIC name>

    and change to new NIC name

    - + \ No newline at end of file diff --git a/kb/package_your_own_toolbox_image/index.html b/kb/package_your_own_toolbox_image/index.html index cbeaa319..b335b28e 100644 --- a/kb/package_your_own_toolbox_image/index.html +++ b/kb/package_your_own_toolbox_image/index.html @@ -9,13 +9,13 @@ Package your own Toolbox Image | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    Package your own Toolbox Image

    · 2 min read
    Vicente Cheng

    Harvester OS is designed as an immutable operating system, which means you cannot directly install additional packages on it. While there is a way to install packages, it is strongly advised against doing so, as it may lead to system instability.

    If you only want to debug with the system, the preferred way is to package the toolbox image with all the needed packages.

    This article shares how to package your toolbox image and how to install any packages on the toolbox image that help you debug the system.

    For example, if you want to analyze a storage performance issue, you can install blktrace on the toolbox image.

    Create a Dockerfile

    FROM opensuse/leap:15.4

    # Install blktrace
    RUN zypper in -y \
    blktrace

    RUN zypper clean --all

    Build the image and push

    # assume you are in the directory of Dockerfile
    $ docker build -t harvester/toolbox:dev .
    .
    .
    .
    naming to docker.io/harvester/toolbox:dev ...
    $ docker push harvester/toolbox:dev
    .
    .
    d4b76d0683d4: Pushed
    a605baa225e2: Pushed
    9e9058bdf63c: Layer already exists

    After you build and push the image, you can run the toolbox using this image to trace storage performance.

    Run the toolbox

    # use `privileged` flag only when you needed. blktrace need debugfs, so I add extra mountpoint.
    docker run -it --privileged -v /sys/kernel/debug/:/sys/kernel/debug/ --rm harvester/toolbox:dev bash

    # test blktrace
    6ffa8eda3aaf:/ $ blktrace -d /dev/nvme0n1 -o - | blkparse -i -
    259,0 10 3414 0.020814875 34084 Q WS 2414127984 + 8 [fio]
    259,0 10 3415 0.020815190 34084 G WS 2414127984 + 8 [fio]
    259,0 10 3416 0.020815989 34084 C WS 3206896544 + 8 [0]
    259,0 10 3417 0.020816652 34084 C WS 2140319184 + 8 [0]
    259,0 10 3418 0.020817992 34084 P N [fio]
    259,0 10 3419 0.020818227 34084 U N [fio] 1
    259,0 10 3420 0.020818437 34084 D WS 2414127984 + 8 [fio]
    259,0 10 3421 0.020821826 34084 Q WS 1743934904 + 8 [fio]
    259,0 10 3422 0.020822150 34084 G WS 1743934904 + 8 [fio]

    - + \ No newline at end of file diff --git a/kb/page/2/index.html b/kb/page/2/index.html index cede015f..5bde303d 100644 --- a/kb/page/2/index.html +++ b/kb/page/2/index.html @@ -9,7 +9,7 @@ Harvester HCI knowledge base | The open-source hyperconverged infrastructure solution for a cloud-native world - + @@ -17,7 +17,7 @@

    · 2 min read
    Date Huang

    NIC Naming Scheme changed after upgrading to v1.0.1

    systemd in OpenSUSE Leap 15.3 which is the base OS of Harvester is upgraded to 246.16-150300.7.39.1. In this version, systemd will enable additional naming scheme sle15-sp3 which is v238 with bridge_no_slot. When there is a PCI bridge associated with NIC, systemd will never generate ID_NET_NAME_SLOT and naming policy in /usr/lib/systemd/network/99-default.link will fallback to ID_NET_NAME_PATH. According to this change, NIC names might be changed in your Harvester nodes during the upgrade process from v1.0.0 to v1.0.1-rc1 or above, and it will cause network issues that are associated with NIC names.

    Effect Settings and Workaround

    Startup Network Configuration

    NIC name changes will need to update the name in /oem/99_custom.yaml. You could use migration script to change the NIC names which are associated with a PCI bridge.

    tip

    You could find an identical machine to test naming changes before applying the configuration to production machines

    You could simply execute the script with root account in v1.0.0 via

    # python3 udev_v238_sle15-sp3.py

    It will output the patched configuration to the screen and you could compare it to the original one to ensure there is no exception. (e.g. We could use vimdiff to check the configuration)

    # python3 udev_v238_sle15-spe3.py > /oem/test
    # vimdiff /oem/test /oem/99_custom.yaml

    After checking the result, we could execute the script with --really-want-to-do to override the configuration. It will also back up the original configuration file with a timestamp before patching it.

    # python3 udev_v238_sle15-sp3.py --really-want-to-do

    Harvester VLAN Network Configuration

    If your VLAN network is associated with NIC name directly without bonding, you will need to migrate ClusterNetwork and NodeNetwork with the previous section together.

    note

    If your VLAN network is associated with the bonding name in /oem/99_custom.yaml, you could skip this section.

    Modify ClusterNetworks

    You need to modify ClusterNetworks via

    $ kubectl edit clusternetworks vlan

    search this pattern

    config:
    defaultPhysicalNIC: <Your NIC name>

    and change to new NIC name

    Modify NodeNetworks

    You need to modify NodeNetworks via

    $ kubectl edit nodenetworks <Node name>-vlan

    search this pattern

    spec:
    nic: <Your NIC name>

    and change to new NIC name

    · 4 min read
    Date Huang

    What is the default behavior of a VM with multiple NICs

    In some scenarios, you'll setup two or more NICs in your VM to serve different networking purposes. If all networks are setup by default with DHCP, you might get random connectivity issues. And while it might get fixed after rebooting the VM, it still will lose connection randomly after some period.

    How-to identify connectivity issues

    In a Linux VM, you can use commands from the iproute2 package to identify the default route.

    In your VM, execute the following command:

    ip route show default
    tip

    If you get the access denied error, please run the command using sudo

    The output of this command will only show the default route with the gateway and VM IP of the primary network interface (eth0 in the example below).

    default via <Gateway IP> dev eth0 proto dhcp src <VM IP> metric 100

    Here is the full example:

    $ ip route show default
    default via 192.168.0.254 dev eth0 proto dhcp src 192.168.0.100 metric 100

    However, if the issue covered in this KB occurs, you'll only be able to connect to the VM via the VNC or serial console.

    Once connected, you can run again the same command as before:

    $ ip route show default

    However, this time you'll get a default route with an incorrect gateway IP. For example:

    default via <Incorrect Gateway IP> dev eth0 proto dhcp src <VM's IP> metric 100

    Why do connectivity issues occur randomly

    In a standard setup, cloud-based VMs typically use DHCP for their NICs configuration. It will set an IP and a gateway for each NIC. Lastly, a default route to the gateway IP will also be added, so you can use its IP to connect to the VM.

    However, Linux distributions start multiple DHCP clients at the same time and do not have a priority system. This means that if you have two or more NICs configured with DHCP, the client will enter a race condition to configure the default route. And depending on the currently running Linux distribution DHCP script, there is no guarantee which default route will be configured.

    As the default route might change in every DHCP renewing process or after every OS reboot, this will create network connectivity issues.

    How to avoid the random connectivity issues

    You can easily avoid these connectivity issues by having only one NIC attached to the VM and having only one IP and one gateway configured.

    However, for VMs in more complex infrastructures, it is often not possible to use just one NIC. For example, if your infrastructure has a storage network and a service network. For security reasons, the storage network will be isolated from the service network and have a separate subnet. In this case, you must have two NICs to connect to both the service and storage networks.

    You can choose a solution below that meets your requirements and security policy.

    Disable DHCP on secondary NIC

    As mentioned above, the problem is caused by a race condition between two DHCP clients. One solution to avoid this problem is to disable DHCP for all NICs and configure them with static IPs only. Likewise, you can configure the secondary NIC with a static IP and keep the primary NIC enabled with DHCP.

    1. To configure the primary NIC with a static IP (eth0 in this example), you can edit the file /etc/sysconfig/network/ifcfg-eth0 with the following values:
    BOOTPROTO='static'
    IPADDR='192.168.0.100'
    NETMASK='255.255.255.0'

    Alternatively, if you want to reserve the primary NIC using DHCP (eth0 in this example), use the following values instead:

    BOOTPROTO='dhcp'
    DHCLIENT_SET_DEFAULT_ROUTE='yes'
    1. You need to configure the default route by editing the file /etc/sysconfig/network/ifroute-eth0 (if you configured the primary NIC using DHCP, skip this step):
    # Destination  Dummy/Gateway  Netmask  Interface
    default 192.168.0.254 - eth0
    warning

    Do not put other default route for your secondary NIC

    1. Finally, configure a static IP for the secondary NIC by editing the file /etc/sysconfig/network/ifcfg-eth1:
    BOOTPROTO='static'
    IPADDR='10.0.0.100'
    NETMASK='255.255.255.0'

    Cloud-Init config

    network:
    version: 1
    config:
    - type: physical
    name: eth0
    subnets:
    - type: dhcp
    - type: physical
    name: eth1
    subnets:
    - type: static
    address: 10.0.0.100/24

    Disable secondary NIC default route from DHCP

    If your secondary NIC requires to get its IP from DHCP, you'll need to disable the secondary NIC default route configuration.

    1. Confirm that the primary NIC configures its default route in the file /etc/sysconfig/network/ifcfg-eth0:
    BOOTPROTO='dhcp'
    DHCLIENT_SET_DEFAULT_ROUTE='yes'
    1. Disable the secondary NIC default route configuration by editing the file /etc/sysconfig/network/ifcfg-eth1:
    BOOTPROTO='dhcp'
    DHCLIENT_SET_DEFAULT_ROUTE='no'

    Cloud-Init config

    This solution is not available in Cloud-Init. Cloud-Init didn't allow any option for DHCP.

    · 16 min read
    PoAn Yang

    How does Harvester schedule a VM?

    Harvester doesn't directly schedule a VM in Kubernetes, it relies on KubeVirt to create the custom resource VirtualMachine. When the request to create a new VM is sent, a VirtualMachineInstance object is created and it creates the corresponding Pod.

    The whole VM creation processt leverages kube-scheduler, which allows Harvester to use nodeSelector, affinity, and resources request/limitation to influence where a VM will be deployed.

    How does kube-scheduler decide where to deploy a VM?

    First, kube-scheduler finds Nodes available to run a pod. After that, kube-scheduler scores each available Node by a list of plugins like ImageLocality, InterPodAffinity, NodeAffinity, etc.

    Finally, kube-scheduler calculates the scores from the plugins results for each Node, and select the Node with the highest score to deploy the Pod.

    For example, let's say we have a three nodes Harvester cluster with 6 cores CPU and 16G RAM each, and we want to deploy a VM with 1 CPU and 1G RAM (without resources overcommit).

    kube-scheduler will summarize the scores, as displayed in Table 1 below, and will select the node with the highest score, harvester-node-2 in this case, to deploy the VM.

    kube-scheduler logs
    virt-launcher-vm-without-overcommit-75q9b -> harvester-node-0: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9960 memory:15166603264] ,score 0,
    virt-launcher-vm-without-overcommit-75q9b -> harvester-node-1: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5560 memory:6352273408] ,score 45,
    virt-launcher-vm-without-overcommit-75q9b -> harvester-node-2: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5350 memory:5941231616] ,score 46,

    virt-launcher-vm-without-overcommit-75q9b -> harvester-node-0: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9960 memory:15166603264] ,score 4,
    virt-launcher-vm-without-overcommit-75q9b -> harvester-node-1: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5560 memory:6352273408] ,score 34,
    virt-launcher-vm-without-overcommit-75q9b -> harvester-node-2: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5350 memory:5941231616] ,score 37,

    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="ImageLocality" node="harvester-node-0" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="ImageLocality" node="harvester-node-1" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="ImageLocality" node="harvester-node-2" score=54

    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="InterPodAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="InterPodAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="InterPodAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodeResourcesLeastAllocated" node="harvester-node-0" score=4
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodeResourcesLeastAllocated" node="harvester-node-1" score=34
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodeResourcesLeastAllocated" node="harvester-node-2" score=37

    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodeAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodeAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodeAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodePreferAvoidPods" node="harvester-node-0" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodePreferAvoidPods" node="harvester-node-2" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodePreferAvoidPods" node="harvester-node-1" score=1000000

    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="PodTopologySpread" node="harvester-node-0" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="PodTopologySpread" node="harvester-node-1" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="PodTopologySpread" node="harvester-node-2" score=200

    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="TaintToleration" node="harvester-node-0" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="TaintToleration" node="harvester-node-1" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="TaintToleration" node="harvester-node-2" score=100

    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodeResourcesBalancedAllocation" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodeResourcesBalancedAllocation" node="harvester-node-1" score=45
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodeResourcesBalancedAllocation" node="harvester-node-2" score=46

    "Calculated node's final score for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" node="harvester-node-0" score=1000358
    "Calculated node's final score for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" node="harvester-node-1" score=1000433
    "Calculated node's final score for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" node="harvester-node-2" score=1000437

    AssumePodVolumes for pod "default/virt-launcher-vm-without-overcommit-75q9b", node "harvester-node-2"
    AssumePodVolumes for pod "default/virt-launcher-vm-without-overcommit-75q9b", node "harvester-node-2": all PVCs bound and nothing to do
    "Attempting to bind pod to node" pod="default/virt-launcher-vm-without-overcommit-75q9b" node="harvester-node-2"

    Table 1 - kube-scheduler scores example

    harvester-node-0harvester-node-1harvester-node-2
    ImageLocality545454
    InterPodAffinity000
    NodeResourcesLeastAllocated43437
    NodeAffinity000
    NodePreferAvoidPods100000010000001000000
    PodTopologySpread200200200
    TaintToleration100100100
    NodeResourcesBalancedAllocation04546
    Total100035810004331000437

    Why VMs are distributed unevenly with overcommit?

    With resources overcommit, Harvester modifies the resources request. By default, the overcommit configuration is {"cpu": 1600, "memory": 150, "storage": 200}. This means that if we request a VM with 1 CPU and 1G RAM, its resources.requests.cpu will become 62m.

    !!! note The unit suffix m stands for "thousandth of a core."

    To explain it, let's take the case of CPU overcommit. The default value of 1 CPU is equal to 1000m CPU, and with the default overcommit configuration of "cpu": 1600, the CPU resource will be 16x smaller. Here is the calculation: 1000m * 100 / 1600 = 62m.

    Now, we can see how overcommitting influences kube-scheduler scores.

    In this example, we use a three nodes Harvester cluster with 6 cores and 16G RAM each. We will deploy two VMs with 1 CPU and 1G RAM, and we will compare the scores for both cases of "with-overcommit" and "without-overcommit" resources.

    The results of both tables Table 2 and Table 3 can be explained as follow:

    In the "with-overcommit" case, both VMs are deployed on harvester-node-2, however in the "without-overcommit" case, the VM1 is deployed on harvester-node-2, and VM2 is deployed on harvester-node-1.

    If we look at the detailed scores, we'll see a variation of Total Score for harvester-node-2 from 1000459 to 1000461 in the "with-overcommit" case, and 1000437 to 1000382 in the "without-overcommit case". It's because resources overcommit influences request-cpu and request-memory.

    In the "with-overcommit" case, the request-cpu changes from 4412m to 4474m. The difference between the two numbers is 62m, which is what we calculated above. However, in the "without-overcommit" case, we send real requests to kube-scheduler, so the request-cpu changes from 5350m to 6350m.

    Finally, since most plugins give the same scores for each node except NodeResourcesBalancedAllocation and NodeResourcesLeastAllocated, we'll see a difference of these two scores for each node.

    From the results, we can see the overcommit feature influences the final score of each Node, so VMs are distributed unevenly. Although the harvester-node-2 score for VM 2 is higher than VM 1, it's not always increasing. In Table 4, we keep deploying VM with 1 CPU and 1G RAM, and we can see the score of harvester-node-2 starts decreasing from 11th VM. The behavior of kube-scheduler depends on your cluster resources and the workload you deployed.

    kube-scheduler logs for vm1-with-overcommit
    virt-launcher-vm1-with-overcommit-ljlmq -> harvester-node-0: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9022 memory:14807289856] ,score 0,
    virt-launcher-vm1-with-overcommit-ljlmq -> harvester-node-1: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4622 memory:5992960000] ,score 58,
    virt-launcher-vm1-with-overcommit-ljlmq -> harvester-node-2: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4412 memory:5581918208] ,score 59,

    virt-launcher-vm1-with-overcommit-ljlmq -> harvester-node-0: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9022 memory:14807289856] ,score 5,
    virt-launcher-vm1-with-overcommit-ljlmq -> harvester-node-1: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4622 memory:5992960000] ,score 43,
    virt-launcher-vm1-with-overcommit-ljlmq -> harvester-node-2: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4412 memory:5581918208] ,score 46,

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="InterPodAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="InterPodAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="InterPodAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodeResourcesLeastAllocated" node="harvester-node-0" score=5
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodeResourcesLeastAllocated" node="harvester-node-1" score=43
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodeResourcesLeastAllocated" node="harvester-node-2" score=46

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodeAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodeAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodeAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodePreferAvoidPods" node="harvester-node-0" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodePreferAvoidPods" node="harvester-node-1" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodePreferAvoidPods" node="harvester-node-2" score=1000000

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="PodTopologySpread" node="harvester-node-0" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="PodTopologySpread" node="harvester-node-1" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="PodTopologySpread" node="harvester-node-2" score=200

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="TaintToleration" node="harvester-node-0" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="TaintToleration" node="harvester-node-1" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="TaintToleration" node="harvester-node-2" score=100

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodeResourcesBalancedAllocation" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodeResourcesBalancedAllocation" node="harvester-node-1" score=58
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodeResourcesBalancedAllocation" node="harvester-node-2" score=59

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="ImageLocality" node="harvester-node-0" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="ImageLocality" node="harvester-node-1" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="ImageLocality" node="harvester-node-2" score=54

    "Calculated node's final score for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" node="harvester-node-0" score=1000359
    "Calculated node's final score for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" node="harvester-node-1" score=1000455
    "Calculated node's final score for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" node="harvester-node-2" score=1000459

    AssumePodVolumes for pod "default/virt-launcher-vm1-with-overcommit-ljlmq", node "harvester-node-2"
    AssumePodVolumes for pod "default/virt-launcher-vm1-with-overcommit-ljlmq", node "harvester-node-2": all PVCs bound and nothing to do
    "Attempting to bind pod to node" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" node="harvester-node-2"
    kube-scheduler logs for vm2-with-overcommit
    virt-launcher-vm2-with-overcommit-pwrx4 -> harvester-node-0: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9022 memory:14807289856] ,score 0,
    virt-launcher-vm2-with-overcommit-pwrx4 -> harvester-node-1: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4622 memory:5992960000] ,score 58,
    virt-launcher-vm2-with-overcommit-pwrx4 -> harvester-node-2: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4474 memory:6476701696] ,score 64,

    virt-launcher-vm2-with-overcommit-pwrx4 -> harvester-node-0: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9022 memory:14807289856] ,score 5,
    virt-launcher-vm2-with-overcommit-pwrx4 -> harvester-node-1: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4622 memory:5992960000] ,score 43,
    virt-launcher-vm2-with-overcommit-pwrx4 -> harvester-node-2: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4474 memory:6476701696] ,score 43,

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodeAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodeAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodeAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodePreferAvoidPods" node="harvester-node-0" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodePreferAvoidPods" node="harvester-node-1" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodePreferAvoidPods" node="harvester-node-2" score=1000000

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="PodTopologySpread" node="harvester-node-0" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="PodTopologySpread" node="harvester-node-1" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="PodTopologySpread" node="harvester-node-2" score=200

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="TaintToleration" node="harvester-node-0" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="TaintToleration" node="harvester-node-1" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="TaintToleration" node="harvester-node-2" score=100

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodeResourcesBalancedAllocation" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodeResourcesBalancedAllocation" node="harvester-node-1" score=58
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodeResourcesBalancedAllocation" node="harvester-node-2" score=64

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="ImageLocality" node="harvester-node-0" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="ImageLocality" node="harvester-node-1" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="ImageLocality" node="harvester-node-2" score=54

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="InterPodAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="InterPodAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="InterPodAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodeResourcesLeastAllocated" node="harvester-node-0" score=5
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodeResourcesLeastAllocated" node="harvester-node-1" score=43
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodeResourcesLeastAllocated" node="harvester-node-2" score=43

    "Calculated node's final score for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" node="harvester-node-0" score=1000359
    "Calculated node's final score for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" node="harvester-node-1" score=1000455
    "Calculated node's final score for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" node="harvester-node-2" score=1000461

    AssumePodVolumes for pod "default/virt-launcher-vm2-with-overcommit-pwrx4", node "harvester-node-2"
    AssumePodVolumes for pod "default/virt-launcher-vm2-with-overcommit-pwrx4", node "harvester-node-2": all PVCs bound and nothing to do
    "Attempting to bind pod to node" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" node="harvester-node-2"
    kube-scheduler logs for vm1-without-overcommit
    virt-launcher-vm1-with-overcommit-6xqmq -> harvester-node-0: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9960 memory:15166603264] ,score 0,
    virt-launcher-vm1-with-overcommit-6xqmq -> harvester-node-1: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5560 memory:6352273408] ,score 45,
    virt-launcher-vm1-with-overcommit-6xqmq -> harvester-node-2: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5350 memory:5941231616] ,score 46,

    virt-launcher-vm1-with-overcommit-6xqmq -> harvester-node-0: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9960 memory:15166603264] ,score 4,
    virt-launcher-vm1-with-overcommit-6xqmq -> harvester-node-1: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5560 memory:6352273408] ,score 34,
    virt-launcher-vm1-with-overcommit-6xqmq -> harvester-node-2: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5350 memory:5941231616] ,score 37,

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="InterPodAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="InterPodAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="InterPodAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodeResourcesLeastAllocated" node="harvester-node-0" score=4
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodeResourcesLeastAllocated" node="harvester-node-1" score=34
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodeResourcesLeastAllocated" node="harvester-node-2" score=37

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodeAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodeAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodeAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodePreferAvoidPods" node="harvester-node-0" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodePreferAvoidPods" node="harvester-node-1" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodePreferAvoidPods" node="harvester-node-2" score=1000000

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="PodTopologySpread" node="harvester-node-0" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="PodTopologySpread" node="harvester-node-1" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="PodTopologySpread" node="harvester-node-2" score=200

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="TaintToleration" node="harvester-node-0" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="TaintToleration" node="harvester-node-1" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="TaintToleration" node="harvester-node-2" score=100

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodeResourcesBalancedAllocation" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodeResourcesBalancedAllocation" node="harvester-node-1" score=45
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodeResourcesBalancedAllocation" node="harvester-node-2" score=46

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="ImageLocality" node="harvester-node-0" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="ImageLocality" node="harvester-node-1" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="ImageLocality" node="harvester-node-2" score=54

    "Calculated node's final score for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" node="harvester-node-0" score=1000358
    "Calculated node's final score for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" node="harvester-node-1" score=1000433
    "Calculated node's final score for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" node="harvester-node-2" score=1000437

    AssumePodVolumes for pod "default/virt-launcher-vm1-with-overcommit-6xqmq", node "harvester-node-2"
    AssumePodVolumes for pod "default/virt-launcher-vm1-with-overcommit-6xqmq", node "harvester-node-2": all PVCs bound and nothing to do
    "Attempting to bind pod to node" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" node="harvester-node-2"
    kube-scheduler logs for vm2-without-overcommit
    virt-launcher-vm2-without-overcommit-mf5vk -> harvester-node-0: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9960 memory:15166603264] ,score 0,
    virt-launcher-vm2-without-overcommit-mf5vk -> harvester-node-1: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5560 memory:6352273408] ,score 45,
    virt-launcher-vm2-without-overcommit-mf5vk -> harvester-node-2: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:6350 memory:7195328512] ,score 0,

    virt-launcher-vm2-without-overcommit-mf5vk -> harvester-node-0: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9960 memory:15166603264] ,score 4,
    virt-launcher-vm2-without-overcommit-mf5vk -> harvester-node-1: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5560 memory:6352273408] ,score 34,
    virt-launcher-vm2-without-overcommit-mf5vk -> harvester-node-2: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:6350 memory:7195328512] ,score 28,

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="PodTopologySpread" node="harvester-node-0" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="PodTopologySpread" node="harvester-node-1" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="PodTopologySpread" node="harvester-node-2" score=200

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="TaintToleration" node="harvester-node-0" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="TaintToleration" node="harvester-node-1" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="TaintToleration" node="harvester-node-2" score=100

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodeResourcesBalancedAllocation" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodeResourcesBalancedAllocation" node="harvester-node-1" score=45
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodeResourcesBalancedAllocation" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="ImageLocality" node="harvester-node-0" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="ImageLocality" node="harvester-node-1" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="ImageLocality" node="harvester-node-2" score=54

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="InterPodAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="InterPodAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="InterPodAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodeResourcesLeastAllocated" node="harvester-node-0" score=4
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodeResourcesLeastAllocated" node="harvester-node-1" score=34
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodeResourcesLeastAllocated" node="harvester-node-2" score=28

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodeAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodeAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodeAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodePreferAvoidPods" node="harvester-node-0" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodePreferAvoidPods" node="harvester-node-1" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodePreferAvoidPods" node="harvester-node-2" score=1000000

    "Calculated node's final score for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" node="harvester-node-0" score=1000358
    "Calculated node's final score for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" node="harvester-node-1" score=1000433
    "Calculated node's final score for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" node="harvester-node-2" score=1000382

    AssumePodVolumes for pod "default/virt-launcher-vm2-without-overcommit-mf5vk", node "harvester-node-1"
    AssumePodVolumes for pod "default/virt-launcher-vm2-without-overcommit-mf5vk", node "harvester-node-1": all PVCs bound and nothing to do
    "Attempting to bind pod to node" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" node="harvester-node-1"

    Table 2 - With Overcommit

    VM 1 / VM 2harvester-node-0harvester-node-1harvester-node-2
    request-cpu (m)9022 / 90224622 / 46224412 / 4474
    request-memory14807289856 / 148072898565992960000 / 59929600005581918208 / 6476701696
    NodeResourcesBalancedAllocation Score0 / 058 / 5859 / 64
    NodeResourcesLeastAllocated Score5 / 543 / 4346 / 43
    Other Scores1000354 / 10003541000354 / 10003541000354 / 1000354
    Total Score1000359 / 10003591000455 / 10004551000459 / 1000461

    Table 3 - Without Overcommit

    VM 1 / VM 2harvester-node-0harvester-node-1harvester-node-2
    request-cpu (m)9960 / 99605560 / 55605350 / 6350
    request-memory15166603264 / 151666032646352273408 / 63522734085941231616 / 7195328512
    NodeResourcesBalancedAllocation Score0 / 045 / 4546 / 0
    NodeResourcesLeastAllocated Score4 / 434 / 3437 / 28
    Other Scores1000354 / 10003541000354 / 10003541000354 / 1000354
    Total Score1000358 / 10003581000358 / 10004331000437 / 1000382

    Table 4

    Scoreharvester-node-0harvester-node-1harvester-node-2
    VM 1100035910004551000459
    VM 2100035910004551000461
    VM 3100035910004551000462
    VM 4100035910004551000462
    VM 5100035910004551000463
    VM 6100035910004551000465
    VM 7100035910004551000466
    VM 8100035910004551000467
    VM 9100035910004551000469
    VM 10100035910004551000469
    VM 11100035910004551000465
    VM 12100035910004551000457

    How to avoid uneven distribution of VMs?

    There are many plugins in kube-scheduler which we can use to influence the scores. For example, we can add the podAntiAffinity plugin to avoid VMs with the same labels being deployed on the same node.

      affinity:
    podAntiAffinity:
    preferredDuringSchedulingIgnoredDuringExecution:
    - podAffinityTerm:
    labelSelector:
    matchExpressions:
    - key: harvesterhci.io/creator
    operator: Exists
    topologyKey: kubernetes.io/hostname
    weight: 100

    How to see scores in kube-scheduler?

    kube-scheduler is deployed as a static pod in Harvester. The file is under /var/lib/rancher/rke2/agent/pod-manifests/kube-scheduler.yaml in each Management Node. We can add - --v=10 to the kube-scheduler container to show score logs.

    kind: Pod
    metadata:
    labels:
    component: kube-scheduler
    tier: control-plane
    name: kube-scheduler
    namespace: kube-system
    spec:
    containers:
    - command:
    - kube-scheduler
    # ...
    - --v=10
    - + \ No newline at end of file diff --git a/kb/rss.xml b/kb/rss.xml index b99d0e13..dc1260a0 100644 --- a/kb/rss.xml +++ b/kb/rss.xml @@ -13,7 +13,7 @@ calculation_of_resource_metrics_in_harvester Tue, 23 Jan 2024 00:00:00 GMT - Harvester calculates the resource metrics using data that is dynamically collected from the system. Host-level resource metrics are calculated and then aggregated to obtain the cluster-level metrics.

    You can view resource-related metrics on the Harvester UI.

    • Hosts screen: Displays host-level metrics

      host level resources metrics

    • Dashboard screen: Displays cluster-level metrics

      cluster level resources metrics

    CPU and Memory

    The following sections describe the data sources and calculation methods for CPU and memory resources.

    • Resource capacity: Baseline data
    • Resource usage: Data source for the Used field on the Hosts screen
    • Resource reservation: Data source for the Reserved field on the Hosts screen

    Resource Capacity

    In Kubernetes, a Node object is created for each host.

    The .status.allocatable.cpu and .status.allocatable.memory represent the available CPU and Memory resources of a host.

    # kubectl get nodes -A -oyaml
    apiVersion: v1
    items:
    - apiVersion: v1
    kind: Node
    metadata:
    ..
    management.cattle.io/pod-limits: '{"cpu":"12715m","devices.kubevirt.io/kvm":"1","devices.kubevirt.io/tun":"1","devices.kubevirt.io/vhost-net":"1","memory":"17104951040"}'
    management.cattle.io/pod-requests: '{"cpu":"5657m","devices.kubevirt.io/kvm":"1","devices.kubevirt.io/tun":"1","devices.kubevirt.io/vhost-net":"1","ephemeral-storage":"50M","memory":"9155862208","pods":"78"}'
    node.alpha.kubernetes.io/ttl: "0"
    ..
    name: harv41
    resourceVersion: "2170215"
    uid: b6f5850a-2fbc-4aef-8fbe-121dfb671b67
    spec:
    podCIDR: 10.52.0.0/24
    podCIDRs:
    - 10.52.0.0/24
    providerID: rke2://harv41
    status:
    addresses:
    - address: 192.168.122.141
    type: InternalIP
    - address: harv41
    type: Hostname
    allocatable:
    cpu: "10"
    devices.kubevirt.io/kvm: 1k
    devices.kubevirt.io/tun: 1k
    devices.kubevirt.io/vhost-net: 1k
    ephemeral-storage: "149527126718"
    hugepages-1Gi: "0"
    hugepages-2Mi: "0"
    memory: 20464216Ki
    pods: "200"
    capacity:
    cpu: "10"
    devices.kubevirt.io/kvm: 1k
    devices.kubevirt.io/tun: 1k
    devices.kubevirt.io/vhost-net: 1k
    ephemeral-storage: 153707984Ki
    hugepages-1Gi: "0"
    hugepages-2Mi: "0"
    memory: 20464216Ki
    pods: "200"

    Resource Usage

    CPU and memory usage data is continuously collected and stored in the NodeMetrics object. Harvester reads the data from usage.cpu and usage.memory.

    # kubectl get NodeMetrics -A -oyaml
    apiVersion: v1
    items:
    - apiVersion: metrics.k8s.io/v1beta1
    kind: NodeMetrics
    metadata:
    ...
    name: harv41
    timestamp: "2024-01-23T12:04:44Z"
    usage:
    cpu: 891736742n
    memory: 9845008Ki
    window: 10.149s

    Resource Reservation

    Harvester dynamically calculates the resource limits and requests of all pods running on a host, and updates the information to the annotations of the NodeMetrics object.

          management.cattle.io/pod-limits: '{"cpu":"12715m",...,"memory":"17104951040"}'
    management.cattle.io/pod-requests: '{"cpu":"5657m",...,"memory":"9155862208"}'

    For more information, see Requests and Limits in the Kubernetes documentation.

    Storage

    Longhorn is the default Container Storage Interface (CSI) driver of Harvester, providing storage management features such as distributed block storage and tiering.

    Reserved Storage in Longhorn

    Longhorn allows you to specify the percentage of disk space that is not allocated to the default disk on each new Longhorn node. The default value is "30". For more information, see Storage Reserved Percentage For Default Disk in the Longhorn documentation.

    Depending on the disk size, you can modify the default value using the embedded Longhorn UI.

    ::: note

    Before changing the settings, read the Longhorn documentation carefully.

    :::

    Data Sources and Calculation

    Harvester uses the following data to calculate metrics for storage resources.

    • Sum of the storageMaximum values of all disks (status.diskStatus.disk-name): Total storage capacity

    • Total storage capacity - Sum of the storageAvailable values of all disks (status.diskStatus.disk-name): Data source for the Used field on the Hosts screen

    • Sum of the storageReserved values of all disks (spec.disks): Data source for the Reserved field on the Hosts screen

    # kubectl get nodes.longhorn.io -n longhorn-system -oyaml

    apiVersion: v1
    items:
    - apiVersion: longhorn.io/v1beta2
    kind: Node
    metadata:
    ..
    name: harv41
    namespace: longhorn-system
    ..
    spec:
    allowScheduling: true
    disks:
    default-disk-ef11a18c36b01132:
    allowScheduling: true
    diskType: filesystem
    evictionRequested: false
    path: /var/lib/harvester/defaultdisk
    storageReserved: 24220101427
    tags: []
    ..
    status:
    ..
    diskStatus:
    default-disk-ef11a18c36b01132:
    ..
    diskType: filesystem
    diskUUID: d2788933-8817-44c6-b688-dee414cc1f73
    scheduledReplica:
    pvc-95561210-c39c-4c2e-ac9a-4a9bd72b3100-r-20affeca: 2147483648
    pvc-9e83b2dc-6a4b-4499-ba70-70dc25b2d9aa-r-4ad05c86: 32212254720
    pvc-bc25be1e-ca4e-4818-a16d-48353a0f2f96-r-c7b88c60: 3221225472
    pvc-d9d3e54d-8d67-4740-861e-6373f670f1e4-r-f4c7c338: 2147483648
    pvc-e954b5fe-bbd7-4d44-9866-6ff6684d5708-r-ba6b87b6: 5368709120
    storageAvailable: 77699481600
    storageMaximum: 80733671424
    storageScheduled: 45097156608
    region: ""
    snapshotCheckStatus: {}
    zone: ""
    ]]>
    + Harvester calculates the resource metrics using data that is dynamically collected from the system. Host-level resource metrics are calculated and then aggregated to obtain the cluster-level metrics.

    You can view resource-related metrics on the Harvester UI.

    • Hosts screen: Displays host-level metrics

      host level resources metrics

    • Dashboard screen: Displays cluster-level metrics

      cluster level resources metrics

    CPU and Memory

    The following sections describe the data sources and calculation methods for CPU and memory resources.

    • Resource capacity: Baseline data
    • Resource usage: Data source for the Used field on the Hosts screen
    • Resource reservation: Data source for the Reserved field on the Hosts screen

    Resource Capacity

    In Kubernetes, a Node object is created for each host.

    The .status.allocatable.cpu and .status.allocatable.memory represent the available CPU and Memory resources of a host.

    # kubectl get nodes -A -oyaml
    apiVersion: v1
    items:
    - apiVersion: v1
    kind: Node
    metadata:
    ..
    management.cattle.io/pod-limits: '{"cpu":"12715m","devices.kubevirt.io/kvm":"1","devices.kubevirt.io/tun":"1","devices.kubevirt.io/vhost-net":"1","memory":"17104951040"}'
    management.cattle.io/pod-requests: '{"cpu":"5657m","devices.kubevirt.io/kvm":"1","devices.kubevirt.io/tun":"1","devices.kubevirt.io/vhost-net":"1","ephemeral-storage":"50M","memory":"9155862208","pods":"78"}'
    node.alpha.kubernetes.io/ttl: "0"
    ..
    name: harv41
    resourceVersion: "2170215"
    uid: b6f5850a-2fbc-4aef-8fbe-121dfb671b67
    spec:
    podCIDR: 10.52.0.0/24
    podCIDRs:
    - 10.52.0.0/24
    providerID: rke2://harv41
    status:
    addresses:
    - address: 192.168.122.141
    type: InternalIP
    - address: harv41
    type: Hostname
    allocatable:
    cpu: "10"
    devices.kubevirt.io/kvm: 1k
    devices.kubevirt.io/tun: 1k
    devices.kubevirt.io/vhost-net: 1k
    ephemeral-storage: "149527126718"
    hugepages-1Gi: "0"
    hugepages-2Mi: "0"
    memory: 20464216Ki
    pods: "200"
    capacity:
    cpu: "10"
    devices.kubevirt.io/kvm: 1k
    devices.kubevirt.io/tun: 1k
    devices.kubevirt.io/vhost-net: 1k
    ephemeral-storage: 153707984Ki
    hugepages-1Gi: "0"
    hugepages-2Mi: "0"
    memory: 20464216Ki
    pods: "200"

    Resource Usage

    CPU and memory usage data is continuously collected and stored in the NodeMetrics object. Harvester reads the data from usage.cpu and usage.memory.

    # kubectl get NodeMetrics -A -oyaml
    apiVersion: v1
    items:
    - apiVersion: metrics.k8s.io/v1beta1
    kind: NodeMetrics
    metadata:
    ...
    name: harv41
    timestamp: "2024-01-23T12:04:44Z"
    usage:
    cpu: 891736742n
    memory: 9845008Ki
    window: 10.149s

    Resource Reservation

    Harvester dynamically calculates the resource limits and requests of all pods running on a host, and updates the information to the annotations of the NodeMetrics object.

          management.cattle.io/pod-limits: '{"cpu":"12715m",...,"memory":"17104951040"}'
    management.cattle.io/pod-requests: '{"cpu":"5657m",...,"memory":"9155862208"}'

    For more information, see Requests and Limits in the Kubernetes documentation.

    Storage

    Longhorn is the default Container Storage Interface (CSI) driver of Harvester, providing storage management features such as distributed block storage and tiering.

    Reserved Storage in Longhorn

    Longhorn allows you to specify the percentage of disk space that is not allocated to the default disk on each new Longhorn node. The default value is "30". For more information, see Storage Reserved Percentage For Default Disk in the Longhorn documentation.

    Depending on the disk size, you can modify the default value using the embedded Longhorn UI.

    note

    Before changing the settings, read the Longhorn documentation carefully.

    Data Sources and Calculation

    Harvester uses the following data to calculate metrics for storage resources.

    • Sum of the storageMaximum values of all disks (status.diskStatus.disk-name): Total storage capacity

    • Total storage capacity - Sum of the storageAvailable values of all disks (status.diskStatus.disk-name): Data source for the Used field on the Hosts screen

    • Sum of the storageReserved values of all disks (spec.disks): Data source for the Reserved field on the Hosts screen

    # kubectl get nodes.longhorn.io -n longhorn-system -oyaml

    apiVersion: v1
    items:
    - apiVersion: longhorn.io/v1beta2
    kind: Node
    metadata:
    ..
    name: harv41
    namespace: longhorn-system
    ..
    spec:
    allowScheduling: true
    disks:
    default-disk-ef11a18c36b01132:
    allowScheduling: true
    diskType: filesystem
    evictionRequested: false
    path: /var/lib/harvester/defaultdisk
    storageReserved: 24220101427
    tags: []
    ..
    status:
    ..
    diskStatus:
    default-disk-ef11a18c36b01132:
    ..
    diskType: filesystem
    diskUUID: d2788933-8817-44c6-b688-dee414cc1f73
    scheduledReplica:
    pvc-95561210-c39c-4c2e-ac9a-4a9bd72b3100-r-20affeca: 2147483648
    pvc-9e83b2dc-6a4b-4499-ba70-70dc25b2d9aa-r-4ad05c86: 32212254720
    pvc-bc25be1e-ca4e-4818-a16d-48353a0f2f96-r-c7b88c60: 3221225472
    pvc-d9d3e54d-8d67-4740-861e-6373f670f1e4-r-f4c7c338: 2147483648
    pvc-e954b5fe-bbd7-4d44-9866-6ff6684d5708-r-ba6b87b6: 5368709120
    storageAvailable: 77699481600
    storageMaximum: 80733671424
    storageScheduled: 45097156608
    region: ""
    snapshotCheckStatus: {}
    zone: ""
    ]]>
    harvester resource metrics reserved resource diff --git a/kb/scan-and-repair-vm-root-filesystem/index.html b/kb/scan-and-repair-vm-root-filesystem/index.html index b2d7feea..f25a56ee 100644 --- a/kb/scan-and-repair-vm-root-filesystem/index.html +++ b/kb/scan-and-repair-vm-root-filesystem/index.html @@ -9,14 +9,14 @@ Scan and Repair Root Filesystem of VirtualMachine | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    Scan and Repair Root Filesystem of VirtualMachine

    · 4 min read
    Vicente Cheng

    In earlier versions of Harvester (v1.0.3 and prior), Longhorn volumes may get corrupted during the replica rebuilding process (reference: Analysis: Potential Data/Filesystem Corruption). In Harvester v1.1.0 and later versions, the Longhorn team has fixed this issue. This article covers manual steps you can take to scan the VM's filesystem and repair it if needed.

    Stop The VM And Backup Volume

    Before you scan the filesystem, it is recommend you back up the volume first. For an example, refer to the following steps to stop the VM and backup the volume.

    • Find the target VM.

    finding the target VM

    • Stop the target VM.

    Stop the target VM

    The target VM is stopped and the related volumes are detached. Now go to the Longhorn UI to backup this volume.

    • Enable Developer Tools & Features (Preferences -> Enable Developer Tools & Features).

    Preferences then enable developer mode Enable the developer mode

    • Click the button and select Edit Config to edit the config page of the VM.

    goto edit config page of VM

    • Go to the Volumes tab and select Check volume details.

    link to longhorn volume page

    • Click the dropdown menu on the right side and select 'Attach' to attach the volume again.

    attach this volume again

    • Select the attached node.

    choose the attached node

    • Check the volume attached under Volume Details and select Take Snapshot on this volume page.

    take snapshot on volume page

    • Confirm that the snapshot is ready.

    check the snapshot is ready

    Now that you completed the volume backup, you need to scan and repair the root filesystem.

    Scanning the root filesystem and repairing

    This section will introduce how to scan the filesystem (e.g., XFS, EXT4) using related tools.

    Before scanning, you need to know the filesystem's device/partition.

    • Identify the filesystem's device by checking the major and minor numbers of that device.
    1. Obtain the major and minor numbers from the listed volume information.

      In the following example, the volume name is pvc-ea7536c0-301f-479e-b2a2-e40ddc864b58.

      harvester-node-0:~ # ls /dev/longhorn/pvc-ea7536c0-301f-479e-b2a2-e40ddc864b58 -al
      brw-rw---- 1 root root 8, 0 Oct 23 14:43 /dev/longhorn/pvc-ea7536c0-301f-479e-b2a2-e40ddc864b58

      The output indicates that the major and minor numbers are 8:0.

    2. Obtain the device name from the output of the lsblk command.

      harvester-node-0:~ # lsblk
      NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINTS
      loop0 7:0 0 3G 1 loop /
      sda 8:0 0 40G 0 disk
      ├─sda1 8:1 0 2M 0 part
      ├─sda2 8:2 0 20M 0 part
      └─sda3 8:3 0 40G 0 part

      The output indicates that 8:0 are the major and minor numbers of the device named sda. Therefore, /dev/sda is related to the volume named pvc-ea7536c0-301f-479e-b2a2-e40ddc864b58.

    • You should now know the filesystem's partition. In the example below, sda3 is the filesystem's partition.
    • Use the Filesystem toolbox image to scan and repair.
    # docker run -it --rm --privileged registry.opensuse.org/isv/rancher/harvester/toolbox/main/fs-toolbox:latest -- bash

    Then we try to scan with this target device.

    XFS

    When scanning an XFS filesystem, use the xfs_repair command and specify the problematic partition of the device.

    In the following example, /dev/sda3 is the problematic partition.

    # xfs_repair -n /dev/sda3

    To repair the corrupted partition, run the following command.

    # xfs_repair /dev/sda3

    EXT4

    When scanning a EXT4 filesystem, use the e2fsck command as follows, where the /dev/sde1 is the problematic partition of the device.

    # e2fsck -f /dev/sde1

    To repair the corrupted partition, run the following command.

    # e2fsck -fp /dev/sde1

    After using the 'e2fsck' command, you should also see logs related to scanning and repairing the partition. Scanning and repairing the corrupted partition is successful if there are no errors in these logs.

    Detach and Start VM again.

    After the corrupted partition is scanned and repaired, detach the volume and try to start the related VM again.

    • Detach the volume from the Longhorn UI.

    detach volume on longhorn UI

    • Start the related VM again from the Harvester UI.

    Start VM again

    Your VM should now work normally.

    - + \ No newline at end of file diff --git a/kb/tags/best-practices/index.html b/kb/tags/best-practices/index.html index dd0548a4..3d7ad55c 100644 --- a/kb/tags/best-practices/index.html +++ b/kb/tags/best-practices/index.html @@ -9,13 +9,13 @@ One post tagged with "best practices" | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    One post tagged with "best practices"

    View All Tags

    · 2 min read
    David Ko
    Jillian Maroket

    The Longhorn documentation provides best practice recommendations for deploying Longhorn in production environments. Before configuring workloads, ensure that you have set up the following basic requirements for optimal disk performance.

    • SATA/NVMe SSDs or disk drives with similar performance
    • 10 Gbps network bandwidth between nodes
    • Dedicated Priority Classes for system-managed and user-deployed Longhorn components

    The following sections outline other recommendations for achieving optimal disk performance.

    IO Performance

    • Storage network: Use a dedicated storage network to improve IO performance and stability.

    • Longhorn disk: Use a dedicated disk for Longhorn storage instead of using the root disk.

    • Replica count: Set the default replica count to "2" to achieve data availability with better disk space usage or less impact to system performance. This practice is especially beneficial to data-intensive applications.

    • Storage tag: Use storage tags to define storage tiering for data-intensive applications. For example, only high-performance disks can be used for storing performance-sensitive data. You can either add disks with tags or create StorageClasses with tags.

    • Data locality: Use best-effort as the default data locality of Longhorn Storage Classes.

      For applications that support data replication (for example, a distributed database), you can use the strict-local option to ensure that only one replica is created for each volume. This practice prevents the extra disk space usage and IO performance overhead associated with volume replication.

      For data-intensive applications, you can use pod scheduling functions such as node selector or taint toleration. These functions allow you to schedule the workload to a specific storage-tagged node together with one replica.

    Space Efficiency

    • Recurring snapshots: Periodically clean up system-generated snapshots and retain only the number of snapshots that makes sense for your implementation.

      For applications with replication capability, periodically delete all types of snapshots.

    Disaster Recovery

    • Recurring backups: Create recurring backup jobs for mission-critical application volumes.

    • System backup: Run periodic system backups.

    - + \ No newline at end of file diff --git a/kb/tags/calculation/index.html b/kb/tags/calculation/index.html index 327fc216..6944a8b6 100644 --- a/kb/tags/calculation/index.html +++ b/kb/tags/calculation/index.html @@ -9,13 +9,13 @@ One post tagged with "calculation" | The open-source hyperconverged infrastructure solution for a cloud-native world - +
    -

    One post tagged with "calculation"

    View All Tags

    · 3 min read
    Jian Wang

    Harvester calculates the resource metrics using data that is dynamically collected from the system. Host-level resource metrics are calculated and then aggregated to obtain the cluster-level metrics.

    You can view resource-related metrics on the Harvester UI.

    • Hosts screen: Displays host-level metrics

      host level resources metrics

    • Dashboard screen: Displays cluster-level metrics

      cluster level resources metrics

    CPU and Memory

    The following sections describe the data sources and calculation methods for CPU and memory resources.

    • Resource capacity: Baseline data
    • Resource usage: Data source for the Used field on the Hosts screen
    • Resource reservation: Data source for the Reserved field on the Hosts screen

    Resource Capacity

    In Kubernetes, a Node object is created for each host.

    The .status.allocatable.cpu and .status.allocatable.memory represent the available CPU and Memory resources of a host.

    # kubectl get nodes -A -oyaml
    apiVersion: v1
    items:
    - apiVersion: v1
    kind: Node
    metadata:
    ..
    management.cattle.io/pod-limits: '{"cpu":"12715m","devices.kubevirt.io/kvm":"1","devices.kubevirt.io/tun":"1","devices.kubevirt.io/vhost-net":"1","memory":"17104951040"}'
    management.cattle.io/pod-requests: '{"cpu":"5657m","devices.kubevirt.io/kvm":"1","devices.kubevirt.io/tun":"1","devices.kubevirt.io/vhost-net":"1","ephemeral-storage":"50M","memory":"9155862208","pods":"78"}'
    node.alpha.kubernetes.io/ttl: "0"
    ..
    name: harv41
    resourceVersion: "2170215"
    uid: b6f5850a-2fbc-4aef-8fbe-121dfb671b67
    spec:
    podCIDR: 10.52.0.0/24
    podCIDRs:
    - 10.52.0.0/24
    providerID: rke2://harv41
    status:
    addresses:
    - address: 192.168.122.141
    type: InternalIP
    - address: harv41
    type: Hostname
    allocatable:
    cpu: "10"
    devices.kubevirt.io/kvm: 1k
    devices.kubevirt.io/tun: 1k
    devices.kubevirt.io/vhost-net: 1k
    ephemeral-storage: "149527126718"
    hugepages-1Gi: "0"
    hugepages-2Mi: "0"
    memory: 20464216Ki
    pods: "200"
    capacity:
    cpu: "10"
    devices.kubevirt.io/kvm: 1k
    devices.kubevirt.io/tun: 1k
    devices.kubevirt.io/vhost-net: 1k
    ephemeral-storage: 153707984Ki
    hugepages-1Gi: "0"
    hugepages-2Mi: "0"
    memory: 20464216Ki
    pods: "200"

    Resource Usage

    CPU and memory usage data is continuously collected and stored in the NodeMetrics object. Harvester reads the data from usage.cpu and usage.memory.

    # kubectl get NodeMetrics -A -oyaml
    apiVersion: v1
    items:
    - apiVersion: metrics.k8s.io/v1beta1
    kind: NodeMetrics
    metadata:
    ...
    name: harv41
    timestamp: "2024-01-23T12:04:44Z"
    usage:
    cpu: 891736742n
    memory: 9845008Ki
    window: 10.149s

    Resource Reservation

    Harvester dynamically calculates the resource limits and requests of all pods running on a host, and updates the information to the annotations of the NodeMetrics object.

          management.cattle.io/pod-limits: '{"cpu":"12715m",...,"memory":"17104951040"}'
    management.cattle.io/pod-requests: '{"cpu":"5657m",...,"memory":"9155862208"}'

    For more information, see Requests and Limits in the Kubernetes documentation.

    Storage

    Longhorn is the default Container Storage Interface (CSI) driver of Harvester, providing storage management features such as distributed block storage and tiering.

    Reserved Storage in Longhorn

    Longhorn allows you to specify the percentage of disk space that is not allocated to the default disk on each new Longhorn node. The default value is "30". For more information, see Storage Reserved Percentage For Default Disk in the Longhorn documentation.

    Depending on the disk size, you can modify the default value using the embedded Longhorn UI.

    ::: note

    Before changing the settings, read the Longhorn documentation carefully.

    :::

    Data Sources and Calculation

    Harvester uses the following data to calculate metrics for storage resources.

    • Sum of the storageMaximum values of all disks (status.diskStatus.disk-name): Total storage capacity

    • Total storage capacity - Sum of the storageAvailable values of all disks (status.diskStatus.disk-name): Data source for the Used field on the Hosts screen

    • Sum of the storageReserved values of all disks (spec.disks): Data source for the Reserved field on the Hosts screen

    # kubectl get nodes.longhorn.io -n longhorn-system -oyaml

    apiVersion: v1
    items:
    - apiVersion: longhorn.io/v1beta2
    kind: Node
    metadata:
    ..
    name: harv41
    namespace: longhorn-system
    ..
    spec:
    allowScheduling: true
    disks:
    default-disk-ef11a18c36b01132:
    allowScheduling: true
    diskType: filesystem
    evictionRequested: false
    path: /var/lib/harvester/defaultdisk
    storageReserved: 24220101427
    tags: []
    ..
    status:
    ..
    diskStatus:
    default-disk-ef11a18c36b01132:
    ..
    diskType: filesystem
    diskUUID: d2788933-8817-44c6-b688-dee414cc1f73
    scheduledReplica:
    pvc-95561210-c39c-4c2e-ac9a-4a9bd72b3100-r-20affeca: 2147483648
    pvc-9e83b2dc-6a4b-4499-ba70-70dc25b2d9aa-r-4ad05c86: 32212254720
    pvc-bc25be1e-ca4e-4818-a16d-48353a0f2f96-r-c7b88c60: 3221225472
    pvc-d9d3e54d-8d67-4740-861e-6373f670f1e4-r-f4c7c338: 2147483648
    pvc-e954b5fe-bbd7-4d44-9866-6ff6684d5708-r-ba6b87b6: 5368709120
    storageAvailable: 77699481600
    storageMaximum: 80733671424
    storageScheduled: 45097156608
    region: ""
    snapshotCheckStatus: {}
    zone: ""
    - +

    One post tagged with "calculation"

    View All Tags

    · 3 min read
    Jian Wang

    Harvester calculates the resource metrics using data that is dynamically collected from the system. Host-level resource metrics are calculated and then aggregated to obtain the cluster-level metrics.

    You can view resource-related metrics on the Harvester UI.

    • Hosts screen: Displays host-level metrics

      host level resources metrics

    • Dashboard screen: Displays cluster-level metrics

      cluster level resources metrics

    CPU and Memory

    The following sections describe the data sources and calculation methods for CPU and memory resources.

    • Resource capacity: Baseline data
    • Resource usage: Data source for the Used field on the Hosts screen
    • Resource reservation: Data source for the Reserved field on the Hosts screen

    Resource Capacity

    In Kubernetes, a Node object is created for each host.

    The .status.allocatable.cpu and .status.allocatable.memory represent the available CPU and Memory resources of a host.

    # kubectl get nodes -A -oyaml
    apiVersion: v1
    items:
    - apiVersion: v1
    kind: Node
    metadata:
    ..
    management.cattle.io/pod-limits: '{"cpu":"12715m","devices.kubevirt.io/kvm":"1","devices.kubevirt.io/tun":"1","devices.kubevirt.io/vhost-net":"1","memory":"17104951040"}'
    management.cattle.io/pod-requests: '{"cpu":"5657m","devices.kubevirt.io/kvm":"1","devices.kubevirt.io/tun":"1","devices.kubevirt.io/vhost-net":"1","ephemeral-storage":"50M","memory":"9155862208","pods":"78"}'
    node.alpha.kubernetes.io/ttl: "0"
    ..
    name: harv41
    resourceVersion: "2170215"
    uid: b6f5850a-2fbc-4aef-8fbe-121dfb671b67
    spec:
    podCIDR: 10.52.0.0/24
    podCIDRs:
    - 10.52.0.0/24
    providerID: rke2://harv41
    status:
    addresses:
    - address: 192.168.122.141
    type: InternalIP
    - address: harv41
    type: Hostname
    allocatable:
    cpu: "10"
    devices.kubevirt.io/kvm: 1k
    devices.kubevirt.io/tun: 1k
    devices.kubevirt.io/vhost-net: 1k
    ephemeral-storage: "149527126718"
    hugepages-1Gi: "0"
    hugepages-2Mi: "0"
    memory: 20464216Ki
    pods: "200"
    capacity:
    cpu: "10"
    devices.kubevirt.io/kvm: 1k
    devices.kubevirt.io/tun: 1k
    devices.kubevirt.io/vhost-net: 1k
    ephemeral-storage: 153707984Ki
    hugepages-1Gi: "0"
    hugepages-2Mi: "0"
    memory: 20464216Ki
    pods: "200"

    Resource Usage

    CPU and memory usage data is continuously collected and stored in the NodeMetrics object. Harvester reads the data from usage.cpu and usage.memory.

    # kubectl get NodeMetrics -A -oyaml
    apiVersion: v1
    items:
    - apiVersion: metrics.k8s.io/v1beta1
    kind: NodeMetrics
    metadata:
    ...
    name: harv41
    timestamp: "2024-01-23T12:04:44Z"
    usage:
    cpu: 891736742n
    memory: 9845008Ki
    window: 10.149s

    Resource Reservation

    Harvester dynamically calculates the resource limits and requests of all pods running on a host, and updates the information to the annotations of the NodeMetrics object.

          management.cattle.io/pod-limits: '{"cpu":"12715m",...,"memory":"17104951040"}'
    management.cattle.io/pod-requests: '{"cpu":"5657m",...,"memory":"9155862208"}'

    For more information, see Requests and Limits in the Kubernetes documentation.

    Storage

    Longhorn is the default Container Storage Interface (CSI) driver of Harvester, providing storage management features such as distributed block storage and tiering.

    Reserved Storage in Longhorn

    Longhorn allows you to specify the percentage of disk space that is not allocated to the default disk on each new Longhorn node. The default value is "30". For more information, see Storage Reserved Percentage For Default Disk in the Longhorn documentation.

    Depending on the disk size, you can modify the default value using the embedded Longhorn UI.

    note

    Before changing the settings, read the Longhorn documentation carefully.

    Data Sources and Calculation

    Harvester uses the following data to calculate metrics for storage resources.

    • Sum of the storageMaximum values of all disks (status.diskStatus.disk-name): Total storage capacity

    • Total storage capacity - Sum of the storageAvailable values of all disks (status.diskStatus.disk-name): Data source for the Used field on the Hosts screen

    • Sum of the storageReserved values of all disks (spec.disks): Data source for the Reserved field on the Hosts screen

    # kubectl get nodes.longhorn.io -n longhorn-system -oyaml

    apiVersion: v1
    items:
    - apiVersion: longhorn.io/v1beta2
    kind: Node
    metadata:
    ..
    name: harv41
    namespace: longhorn-system
    ..
    spec:
    allowScheduling: true
    disks:
    default-disk-ef11a18c36b01132:
    allowScheduling: true
    diskType: filesystem
    evictionRequested: false
    path: /var/lib/harvester/defaultdisk
    storageReserved: 24220101427
    tags: []
    ..
    status:
    ..
    diskStatus:
    default-disk-ef11a18c36b01132:
    ..
    diskType: filesystem
    diskUUID: d2788933-8817-44c6-b688-dee414cc1f73
    scheduledReplica:
    pvc-95561210-c39c-4c2e-ac9a-4a9bd72b3100-r-20affeca: 2147483648
    pvc-9e83b2dc-6a4b-4499-ba70-70dc25b2d9aa-r-4ad05c86: 32212254720
    pvc-bc25be1e-ca4e-4818-a16d-48353a0f2f96-r-c7b88c60: 3221225472
    pvc-d9d3e54d-8d67-4740-861e-6373f670f1e4-r-f4c7c338: 2147483648
    pvc-e954b5fe-bbd7-4d44-9866-6ff6684d5708-r-ba6b87b6: 5368709120
    storageAvailable: 77699481600
    storageMaximum: 80733671424
    storageScheduled: 45097156608
    region: ""
    snapshotCheckStatus: {}
    zone: ""
    + \ No newline at end of file diff --git a/kb/tags/ceph/index.html b/kb/tags/ceph/index.html index f515a5e4..ab843888 100644 --- a/kb/tags/ceph/index.html +++ b/kb/tags/ceph/index.html @@ -9,13 +9,13 @@ One post tagged with "ceph" | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    One post tagged with "ceph"

    View All Tags

    · 4 min read
    Hang Yu

    Starting with Harvester v1.2.0, it offers the capability to install a Container Storage Interface (CSI) in your Harvester cluster. This allows you to leverage external storage for the Virtual Machine's non-system data disk, giving you the flexibility to use different drivers tailored for specific needs, whether it's for performance optimization or seamless integration with your existing in-house storage solutions.

    It's important to note that, despite this enhancement, the provisioner for the Virtual Machine (VM) image in Harvester still relies on Longhorn. Prior to version 1.2.0, Harvester exclusively supported Longhorn for storing VM data and did not offer support for external storage as a destination for VM data.

    One of the options for integrating external storage with Harvester is Rook, an open-source cloud-native storage orchestrator. Rook provides a robust platform, framework, and support for Ceph storage, enabling seamless integration with cloud-native environments.

    Ceph is a software-defined distributed storage system that offers versatile storage capabilities, including file, block, and object storage. It is designed for large-scale production clusters and can be deployed effectively in such environments.

    Rook simplifies the deployment and management of Ceph, offering self-managing, self-scaling, and self-healing storage services. It leverages Kubernetes resources to automate the deployment, configuration, provisioning, scaling, upgrading, and monitoring of Ceph.

    In this article, we will walk you through the process of installing, configuring, and utilizing Rook to use storage from an existing external Ceph cluster as a data disk for a VM within the Harvester environment.

    Install Harvester Cluster

    Harvester's operating system follows an immutable design, meaning that most OS files revert to their pre-configured state after a reboot. To accommodate Rook Ceph's requirements, you need to add specific persistent paths to the os.persistentStatePaths section in the Harvester configuration. These paths include:

    os:
    persistent_state_paths:
    - /var/lib/rook
    - /var/lib/ceph
    modules:
    - rbd
    - nbd

    After the cluster is installed, refer to How can I access the kubeconfig file of the Harvester cluster? to get the kubeconfig of the Harvester cluster.

    Install Rook to Harvester

    Install Rook to the Harvester cluster by referring to Rook Quickstart.

    curl -fsSLo rook.tar.gz https://github.com/rook/rook/archive/refs/tags/v1.12.2.tar.gz \
    && tar -zxf rook.tar.gz && cd rook-1.12.2/deploy/examples
    # apply configurations ref: https://rook.github.io/docs/rook/v1.12/Getting-Started/example-configurations/
    kubectl apply -f crds.yaml -f common.yaml -f operator.yaml
    kubectl -n rook-ceph wait --for=condition=Available deploy rook-ceph-operator --timeout=10m

    Using an existing external Ceph cluster

    1. Run the python script create-external-cluster-resources.py in the existing external Ceph cluster for creating all users and keys.
    # script help ref: https://www.rook.io/docs/rook/v1.12/CRDs/Cluster/external-cluster/#1-create-all-users-and-keys
    curl -s https://raw.githubusercontent.com/rook/rook/v1.12.2/deploy/examples/create-external-cluster-resources.py > create-external-cluster-resources.py
    python3 create-external-cluster-resources.py --rbd-data-pool-name <pool_name> --namespace rook-ceph-external --format bash
    1. Copy the Bash output.

    Example output:

    export NAMESPACE=rook-ceph-external
    export ROOK_EXTERNAL_FSID=b3b47828-4c60-11ee-be38-51902f85c805
    export ROOK_EXTERNAL_USERNAME=client.healthchecker
    export ROOK_EXTERNAL_CEPH_MON_DATA=ceph-1=192.168.5.99:6789
    export ROOK_EXTERNAL_USER_SECRET=AQDd6/dkFyu/IhAATv/uCMbHtWk4AYK2KXzBhQ==
    export ROOK_EXTERNAL_DASHBOARD_LINK=https://192.168.5.99:8443/
    export CSI_RBD_NODE_SECRET=AQDd6/dk2HsjIxAA06Yw9UcOg0dfwV/9IFBRhA==
    export CSI_RBD_NODE_SECRET_NAME=csi-rbd-node
    export CSI_RBD_PROVISIONER_SECRET=AQDd6/dkEY1kIxAAAzrXZnVRf4x+wDUz1zyaQg==
    export CSI_RBD_PROVISIONER_SECRET_NAME=csi-rbd-provisioner
    export MONITORING_ENDPOINT=192.168.5.99
    export MONITORING_ENDPOINT_PORT=9283
    export RBD_POOL_NAME=test
    export RGW_POOL_PREFIX=default
    1. Consume the external Ceph cluster resources on the Harvester cluster.
    # Paste the above output from create-external-cluster-resources.py into import-env.sh
    vim import-env.sh
    source import-env.sh
    # this script will create a StorageClass ceph-rbd
    source import-external-cluster.sh
    kubectl apply -f common-external.yaml
    kubectl apply -f cluster-external.yaml
    # wait for all pods to become Ready
    watch 'kubectl --namespace rook-ceph get pods'
    1. Create the VolumeSnapshotClass csi-rbdplugin-snapclass-external.
    cat >./csi/rbd/snapshotclass-external.yaml <<EOF
    ---
    apiVersion: snapshot.storage.k8s.io/v1
    kind: VolumeSnapshotClass
    metadata:
    name: csi-rbdplugin-snapclass-external
    driver: rook-ceph.rbd.csi.ceph.com # driver:namespace:operator
    parameters:
    clusterID: rook-ceph-external # namespace:cluster
    csi.storage.k8s.io/snapshotter-secret-name: rook-csi-rbd-provisioner
    csi.storage.k8s.io/snapshotter-secret-namespace: rook-ceph-external # namespace:cluster
    deletionPolicy: Delete
    EOF

    kubectl apply -f ./csi/rbd/snapshotclass-external.yaml

    Configure Harvester Cluster

    Before you can make use of Harvester's Backup & Snapshot features, you need to set up some essential configurations through the Harvester csi-driver-config setting. To set up these configurations, follow these steps:

    1. Login to the Harvester UI, then navigate to Advanced > Settings.
    2. Find and select csi-driver-config, and then click on the > Edit Setting to access the configuration options.
    3. In the settings, set the Provisioner to rook-ceph.rbd.csi.ceph.com.
    4. Next, specify the Volume Snapshot Class Name as csi-rbdplugin-snapclass-external. This setting points to the name of the VolumeSnapshotClass used for creating volume snapshots or VM snapshots.
    5. Similarly, set the Backup Volume Snapshot Class Name to csi-rbdplugin-snapclass-external. This corresponds to the name of the VolumeSnapshotClass responsible for creating VM backups.

    csi-driver-config-external

    Use Rook Ceph in Harvester

    After successfully configuring these settings, you can proceed to utilize the Rook Ceph StorageClass, which is named rook-ceph-block for the internal Ceph cluster or named ceph-rbd for the external Ceph cluster. You can apply this StorageClass when creating an empty volume or adding a new block volume to a VM, enhancing your Harvester cluster's storage capabilities.

    With these configurations in place, your Harvester cluster is ready to make the most of the Rook Ceph storage integration.

    rook-ceph-volume-external

    rook-ceph-vm-external

    - + \ No newline at end of file diff --git a/kb/tags/cloud-provider/index.html b/kb/tags/cloud-provider/index.html index 8211c4f9..b36644b7 100644 --- a/kb/tags/cloud-provider/index.html +++ b/kb/tags/cloud-provider/index.html @@ -9,13 +9,13 @@ One post tagged with "cloud provider" | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    One post tagged with "cloud provider"

    View All Tags

    · 3 min read
    Canwu Yao

    As Harvester v1.2.0 is released, a new Harvester cloud provider version 0.2.2 is integrated into RKE2 v1.24.15+rke2r1, v1.25.11+rke2r1, v1.26.6+rke2r1, v1.27.3+rke2r1, and newer versions.

    With Harvester v1.2.0, the new Harvester cloud provider offers enhanced load balancing capabilities for guest Kubernetes services. Specifically, it introduces the Harvester IP Pool feature, a built-in IP address management (IPAM) solution for the Harvester load balancer. It allows you to define an IP pool specific to a particular guest cluster by specifying the guest cluster name. For example, you can create an IP pool exclusively for the guest cluster named cluster2:

    image

    However, after upgrading, the feature is not automatically compatible with existing guest Kubernetes clusters, as they do not pass the correct cluster name to the Harvester cloud provider. Refer to issue 4232 for more details. Users can manually upgrade the Harvester cloud provider using Helm as a workaround and provide the correct cluster name after upgrading. However, this would result in a change in the load balancer IPs.

    This article outlines a workaround that allows you to leverage the new IP pool feature while keeping the load balancer IPs unchanged.

    Prerequisites

    • Download the Harvester kubeconfig file from the Harvester UI. If you have imported Harvester into Rancher, do not use the kubeconfig file from the Rancher UI. Refer to Access Harvester Cluster to get the desired one.

    • Download the kubeconfig file for the guest Kubernetes cluster you plan to upgrade. Refer to Accessing Clusters with kubectl from Your Workstation for instructions on how to download the kubeconfig file.

    Steps to Keep Load Balancer IP

    1. Execute the following script before upgrading.

      curl -sfL https://raw.githubusercontent.com/harvester/harvesterhci.io/main/kb/2023-08-21/keepip.sh | sh -s before_upgrade <Harvester-kubeconfig-path> <guest-cluster-kubeconfig-path> <guest-cluster-name> <guest-cluster-nodes-namespace>
      • <Harvester-kubeconfig-path>: Path to the Harvester kubeconfig file.
      • <guest-cluster-kubeconfig-path>: Path to the kubeconfig file of your guest Kubernetes cluster.
      • <guest-cluster-name>: Name of your guest cluster.
      • <guest-cluster-nodes-namespace>: Namespace where the VMs of the guest cluster are located.

      The script will help users copy the DHCP information to the service annotation and modify the IP pool allocated history to make sure the IP is unchanged.

      image

      After executing the script, the load balancer service with DHCP mode will be annotated with the DHCP information. For example:

      apiVersion: v1
      kind: Service
      metadata:
      annotations:
      kube-vip.io/hwaddr: 00:00:6c:4f:18:68
      kube-vip.io/requestedIP: 172.19.105.215
      name: lb0
      namespace: default

      As for the load balancer service with pool mode, the IP pool allocated history will be modified as the new load balancer name. For example:

      apiVersion: loadbalancer.harvesterhci.io/v1beta1
      kind: IPPool
      metadata:
      name: default
      spec:
      ...
      status:
      allocatedHistory:
      192.168.100.2: default/cluster-name-default-lb1-ddc13071 # replace the new load balancer name
    2. Add network selector for the pool.

      For example, the following cluster is under the VM network default/mgmt-untagged. The network selector should be default/mgmt-untagged.

      image

      image

    3. Upgrade the RKE2 cluster in the Rancher UI and select the new version.

      image

    4. Execute the script after upgrading.

      curl -sfL https://raw.githubusercontent.com/harvester/harvesterhci.io/main/kb/2023-08-21/keepip.sh | sh -s after_upgrade <Harvester-kubeconfig-path> <guest-cluster-kubeconfig-path> <guest-cluster-name> <guest-cluster-nodes-namespace>

      image

      In this step, the script wraps the operations to upgrade the Harvester cloud provider to set the cluster name. After the Harvester cloud provider is running, the new Harvester load balancers will be created with the unchanged IPs.

    - + \ No newline at end of file diff --git a/kb/tags/configuration/index.html b/kb/tags/configuration/index.html index 79a0b822..26b6cce2 100644 --- a/kb/tags/configuration/index.html +++ b/kb/tags/configuration/index.html @@ -9,13 +9,13 @@ One post tagged with "configuration" | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    One post tagged with "configuration"

    View All Tags

    · 11 min read
    Jian Wang

    In Harvester, the VM Live Migration is well supported by the UI. Please refer to Harvester VM Live Migration for more details.

    The VM Live Migration process is finished smoothly in most cases. However, sometimes the migration may get stuck and not end as expected.

    This article dives into the VM Live Migration process in more detail. There are three main parts:

    • General Process of VM Live Migration
    • VM Live Migration Strategies
    • VM Live Migration Configurations

    Related issues:

    note

    A big part of the following contents are copied from kubevirt document https://kubevirt.io/user-guide/operations/live_migration/, some contents/formats are adjusted to fit in this document.

    General Process of VM Live Migration

    Starting a Migration from Harvester UI

    1. Go to the Virtual Machines page.
    2. Find the virtual machine that you want to migrate and select > Migrate.
    3. Choose the node to which you want to migrate the virtual machine and select Apply.

    After successfully selecting Apply, a CRD VirtualMachineInstanceMigration object is created, and the related controller/operator will start the process.

    Migration CRD Object

    You can also create the CRD VirtualMachineInstanceMigration object manually via kubectl or other tools.

    The example below starts a migration process for a virtual machine instance (VMI) new-vm.

    apiVersion: kubevirt.io/v1
    kind: VirtualMachineInstanceMigration
    metadata:
    name: migration-job
    spec:
    vmiName: new-vm

    Under the hood, the open source projects Kubevirt, Libvirt, QEMU, ... perform most of the VM Live Migration. References.

    Migration Status Reporting

    When starting a virtual machine instance (VMI), it has also been calculated whether the machine is live migratable. The result is being stored in the VMI VMI.status.conditions. The calculation can be based on multiple parameters of the VMI, however, at the moment, the calculation is largely based on the Access Mode of the VMI volumes. Live migration is only permitted when the volume access mode is set to ReadWriteMany. Requests to migrate a non-LiveMigratable VMI will be rejected.

    The reported Migration Method is also being calculated during VMI start. BlockMigration indicates that some of the VMI disks require copying from the source to the destination. LiveMigration means that only the instance memory will be copied.

    Status:
    Conditions:
    Status: True
    Type: LiveMigratable
    Migration Method: BlockMigration

    Migration Status

    The migration progress status is reported in VMI.status. Most importantly, it indicates whether the migration has been completed or failed.

    Below is an example of a successful migration.

    Migration State:
    Completed: true
    End Timestamp: 2019-03-29T03:37:52Z
    Migration Config:
    Completion Timeout Per GiB: 800
    Progress Timeout: 150
    Migration UID: c64d4898-51d3-11e9-b370-525500d15501
    Source Node: node02
    Start Timestamp: 2019-03-29T04:02:47Z
    Target Direct Migration Node Ports:
    35001: 0
    41068: 49152
    38284: 49153
    Target Node: node01
    Target Node Address: 10.128.0.46
    Target Node Domain Detected: true
    Target Pod: virt-launcher-testvmimcbjgw6zrzcmp8wpddvztvzm7x2k6cjbdgktwv8tkq

    VM Live Migration Strategies

    VM Live Migration is a process during which a running Virtual Machine Instance moves to another compute node while the guest workload continues to run and remain accessible.

    Understanding Different VM Live Migration Strategies

    VM Live Migration is a complex process. During a migration, the source VM needs to transfer its whole state (mainly RAM) to the target VM. If there are enough resources available, such as network bandwidth and CPU power, migrations should converge nicely. If this is not the scenario, however, the migration might get stuck without an ability to progress.

    The main factor that affects migrations from the guest perspective is its dirty rate, which is the rate by which the VM dirties memory. Guests with high dirty rate lead to a race during migration. On the one hand, memory would be transferred continuously to the target, and on the other, the same memory would get dirty by the guest. On such scenarios, one could consider to use more advanced migration strategies. Refer to Understanding different migration strategies for more details.

    There are 3 VM Live Migration strategies/policies:

    VM Live Migration Strategy: Pre-copy

    Pre-copy is the default strategy. It should be used for most cases.

    The way it works is as following:

    1. The target VM is created, but the guest keeps running on the source VM.
    2. The source starts sending chunks of VM state (mostly memory) to the target. This continues until all of the state has been transferred to the target.
    3. The guest starts executing on the target VM. 4. The source VM is being removed.

    Pre-copy is the safest and fastest strategy for most cases. Furthermore, it can be easily cancelled, can utilize multithreading, and more. If there is no real reason to use another strategy, this is definitely the strategy to go with.

    However, on some cases migrations might not converge easily, that is, by the time the chunk of source VM state would be received by the target VM, it would already be mutated by the source VM (which is the VM the guest executes on). There are many reasons for migrations to fail converging, such as a high dirty-rate or low resources like network bandwidth and CPU. On such scenarios, see the following alternative strategies below.

    VM Live Migration Strategy: Post-copy

    The way post-copy migrations work is as following:

    1. The target VM is created.
    2. The guest is being run on the target VM.
    3. The source starts sending chunks of VM state (mostly memory) to the target.
    4. When the guest, running on the target VM, would access memory: 1. If the memory exists on the target VM, the guest can access it. 2. Otherwise, the target VM asks for a chunk of memory from the source VM.
    5. Once all of the memory state is updated at the target VM, the source VM is being removed.

    The main idea here is that the guest starts to run immediately on the target VM. This approach has advantages and disadvantages:

    Advantages:

    • The same memory chink is never being transferred twice. This is possible due to the fact that with post-copy it doesn't matter that a page had been dirtied since the guest is already running on the target VM.
    • This means that a high dirty-rate has much less effect.
    • Consumes less network bandwidth.

    Disadvantages:

    • When using post-copy, the VM state has no one source of truth. When the guest (running on the target VM) writes to memory, this memory is one part of the guest's state, but some other parts of it may still be updated only at the source VM. This situation is generally dangerous, since, for example, if either the target or guest VMs crash the state cannot be recovered.
    • Slow warmup: when the guest starts executing, no memory is present at the target VM. Therefore, the guest would have to wait for a lot of memory in a short period of time.
    • Slower than pre-copy on most cases.
    • Harder to cancel a migration.

    VM Live Migration Strategy: Auto-converge

    Auto-converge is a technique to help pre-copy migrations converge faster without changing the core algorithm of how the migration works.

    Since a high dirty-rate is usually the most significant factor for migrations to not converge, auto-converge simply throttles the guest's CPU. If the migration would converge fast enough, the guest's CPU would not be throttled or throttled negligibly. But, if the migration would not converge fast enough, the CPU would be throttled more and more as time goes.

    This technique dramatically increases the probability of the migration converging eventually.

    Observe the VM Live Migration Progress and Result

    Migration Timeouts

    Depending on the type, the live migration process will copy virtual machine memory pages and disk blocks to the destination. During this process non-locked pages and blocks are being copied and become free for the instance to use again. To achieve a successful migration, it is assumed that the instance will write to the free pages and blocks (pollute the pages) at a lower rate than these are being copied.

    Completion Time

    In some cases the virtual machine can write to different memory pages / disk blocks at a higher rate than these can be copied, which will prevent the migration process from completing in a reasonable amount of time. In this case, live migration will be aborted if it is running for a long period of time. The timeout is calculated base on the size of the VMI, it's memory and the ephemeral disks that are needed to be copied. The configurable parameter completionTimeoutPerGiB, which defaults to 800s is the time for GiB of data to wait for the migration to be completed before aborting it. A VMI with 8Gib of memory will time out after 6400 seconds.

    Progress Timeout

    A VM Live Migration will also be aborted when it notices that copying memory doesn't make any progress. The time to wait for live migration to make progress in transferring data is configurable by the progressTimeout parameter, which defaults to 150 seconds.

    VM Live Migration Configurations

    Changing Cluster Wide Migration Limits

    KubeVirt puts some limits in place so that migrations don't overwhelm the cluster. By default, it is to only run 5 migrations in parallel with an additional limit of a maximum of 2 outbound migrations per node. Finally, every migration is limited to a bandwidth of 64MiB/s.

    You can change these values in the kubevirt CR:

        apiVersion: kubevirt.io/v1
    kind: Kubevirt
    metadata:
    name: kubevirt
    namespace: kubevirt
    spec:
    configuration:
    migrations:
    parallelMigrationsPerCluster: 5
    parallelOutboundMigrationsPerNode: 2
    bandwidthPerMigration: 64Mi
    completionTimeoutPerGiB: 800
    progressTimeout: 150
    disableTLS: false
    nodeDrainTaintKey: "kubevirt.io/drain"
    allowAutoConverge: false ---------------------> related to: Auto-converge
    allowPostCopy: false -------------------------> related to: Post-copy
    unsafeMigrationOverride: false

    Remember that most of these configurations can be overridden and fine-tuned to a specified group of VMs. For more information, please refer to the Migration Policies section below.

    Migration Policies

    Migration policies provides a new way of applying migration configurations to Virtual Machines. The policies can refine Kubevirt CR's MigrationConfiguration that sets the cluster-wide migration configurations. This way, the cluster-wide settings default how the migration policy can be refined (i.e., changed, removed, or added).

    Remember that migration policies are in version v1alpha1. This means that this API is not fully stable yet and that APIs may change in the future.

    Migration Configurations

    Currently, the MigrationPolicy spec only includes the following configurations from Kubevirt CR's MigrationConfiguration. (In the future, more configurations that aren't part of Kubevirt CR will be added):

    apiVersion: migrations.kubevirt.io/v1alpha1
    kind: MigrationPolicy
    spec:
    allowAutoConverge: true
    bandwidthPerMigration: 217Ki
    completionTimeoutPerGiB: 23
    allowPostCopy: false

    All the above fields are optional. When omitted, the configuration will be applied as defined in KubevirtCR's MigrationConfiguration. This way, KubevirtCR will serve as a configurable set of defaults for both VMs that are not bound to any MigrationPolicy and VMs that are bound to a MigrationPolicy that does not define all fields of the configurations.

    Matching Policies to VMs

    Next in the spec are the selectors defining the group of VMs to apply the policy. The options to do so are the following.

    This policy applies to the VMs in namespaces that have all the required labels:

    apiVersion: migrations.kubevirt.io/v1alpha1
    kind: MigrationPolicy
    spec:
    selectors:
    namespaceSelector:
    hpc-workloads: true # Matches a key and a value

    The policy below applies to the VMs that have all the required labels:

    apiVersion: migrations.kubevirt.io/v1alpha1
    kind: MigrationPolicy
    spec:
    selectors:
    virtualMachineInstanceSelector:
    workload-type: db # Matches a key and a value

    References

    Documents

    Libvirt Guest Migration

    Libvirt has a chapter to describe the pricipal of VM/Guest Live Migration.

    https://libvirt.org/migration.html

    Kubevirt Live Migration

    https://kubevirt.io/user-guide/operations/live_migration/

    Source Code

    The VM Live Migration related configuration options are passed to each layer correspondingly.

    Kubevirt

    https://github.com/kubevirt/kubevirt/blob/d425593ae392111dab80403ef0cde82625e37653/pkg/virt-launcher/virtwrap/live-migration-source.go#L103

    ...
    import "libvirt.org/go/libvirt"

    ...

    func generateMigrationFlags(isBlockMigration, migratePaused bool, options *cmdclient.MigrationOptions) libvirt.DomainMigrateFlags {
    ...
    if options.AllowAutoConverge {
    migrateFlags |= libvirt.MIGRATE_AUTO_CONVERGE
    }
    if options.AllowPostCopy {
    migrateFlags |= libvirt.MIGRATE_POSTCOPY
    }
    ...
    }

    Go Package Libvirt

    https://pkg.go.dev/libvirt.org/go/libvirt

    const (
    ...
    MIGRATE_AUTO_CONVERGE = DomainMigrateFlags(C.VIR_MIGRATE_AUTO_CONVERGE)
    MIGRATE_RDMA_PIN_ALL = DomainMigrateFlags(C.VIR_MIGRATE_RDMA_PIN_ALL)
    MIGRATE_POSTCOPY = DomainMigrateFlags(C.VIR_MIGRATE_POSTCOPY)
    ...
    )

    Libvirt

    https://github.com/libvirt/libvirt/blob/bfe53e9145cd5996a791c5caff0686572b850f82/include/libvirt/libvirt-domain.h#L1030

        /* Enable algorithms that ensure a live migration will eventually converge.
    * This usually means the domain will be slowed down to make sure it does
    * not change its memory faster than a hypervisor can transfer the changed
    * memory to the destination host. VIR_MIGRATE_PARAM_AUTO_CONVERGE_*
    * parameters can be used to tune the algorithm.
    *
    * Since: 1.2.3
    */
    VIR_MIGRATE_AUTO_CONVERGE = (1 << 13),
    ...
    /* Setting the VIR_MIGRATE_POSTCOPY flag tells libvirt to enable post-copy
    * migration. However, the migration will start normally and
    * virDomainMigrateStartPostCopy needs to be called to switch it into the
    * post-copy mode. See virDomainMigrateStartPostCopy for more details.
    *
    * Since: 1.3.3
    */
    VIR_MIGRATE_POSTCOPY = (1 << 15),
    - + \ No newline at end of file diff --git a/kb/tags/container/index.html b/kb/tags/container/index.html index 478b3aac..9ad89f29 100644 --- a/kb/tags/container/index.html +++ b/kb/tags/container/index.html @@ -9,13 +9,13 @@ One post tagged with "container" | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    One post tagged with "container"

    View All Tags

    · 2 min read
    Vicente Cheng

    Harvester OS is designed as an immutable operating system, which means you cannot directly install additional packages on it. While there is a way to install packages, it is strongly advised against doing so, as it may lead to system instability.

    If you only want to debug with the system, the preferred way is to package the toolbox image with all the needed packages.

    This article shares how to package your toolbox image and how to install any packages on the toolbox image that help you debug the system.

    For example, if you want to analyze a storage performance issue, you can install blktrace on the toolbox image.

    Create a Dockerfile

    FROM opensuse/leap:15.4

    # Install blktrace
    RUN zypper in -y \
    blktrace

    RUN zypper clean --all

    Build the image and push

    # assume you are in the directory of Dockerfile
    $ docker build -t harvester/toolbox:dev .
    .
    .
    .
    naming to docker.io/harvester/toolbox:dev ...
    $ docker push harvester/toolbox:dev
    .
    .
    d4b76d0683d4: Pushed
    a605baa225e2: Pushed
    9e9058bdf63c: Layer already exists

    After you build and push the image, you can run the toolbox using this image to trace storage performance.

    Run the toolbox

    # use `privileged` flag only when you needed. blktrace need debugfs, so I add extra mountpoint.
    docker run -it --privileged -v /sys/kernel/debug/:/sys/kernel/debug/ --rm harvester/toolbox:dev bash

    # test blktrace
    6ffa8eda3aaf:/ $ blktrace -d /dev/nvme0n1 -o - | blkparse -i -
    259,0 10 3414 0.020814875 34084 Q WS 2414127984 + 8 [fio]
    259,0 10 3415 0.020815190 34084 G WS 2414127984 + 8 [fio]
    259,0 10 3416 0.020815989 34084 C WS 3206896544 + 8 [0]
    259,0 10 3417 0.020816652 34084 C WS 2140319184 + 8 [0]
    259,0 10 3418 0.020817992 34084 P N [fio]
    259,0 10 3419 0.020818227 34084 U N [fio] 1
    259,0 10 3420 0.020818437 34084 D WS 2414127984 + 8 [fio]
    259,0 10 3421 0.020821826 34084 Q WS 1743934904 + 8 [fio]
    259,0 10 3422 0.020822150 34084 G WS 1743934904 + 8 [fio]

    - + \ No newline at end of file diff --git a/kb/tags/csi/index.html b/kb/tags/csi/index.html index 4fe2ae81..81e1e474 100644 --- a/kb/tags/csi/index.html +++ b/kb/tags/csi/index.html @@ -9,13 +9,13 @@ One post tagged with "csi" | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    One post tagged with "csi"

    View All Tags

    · 4 min read
    Hang Yu

    Starting with Harvester v1.2.0, it offers the capability to install a Container Storage Interface (CSI) in your Harvester cluster. This allows you to leverage external storage for the Virtual Machine's non-system data disk, giving you the flexibility to use different drivers tailored for specific needs, whether it's for performance optimization or seamless integration with your existing in-house storage solutions.

    It's important to note that, despite this enhancement, the provisioner for the Virtual Machine (VM) image in Harvester still relies on Longhorn. Prior to version 1.2.0, Harvester exclusively supported Longhorn for storing VM data and did not offer support for external storage as a destination for VM data.

    One of the options for integrating external storage with Harvester is Rook, an open-source cloud-native storage orchestrator. Rook provides a robust platform, framework, and support for Ceph storage, enabling seamless integration with cloud-native environments.

    Ceph is a software-defined distributed storage system that offers versatile storage capabilities, including file, block, and object storage. It is designed for large-scale production clusters and can be deployed effectively in such environments.

    Rook simplifies the deployment and management of Ceph, offering self-managing, self-scaling, and self-healing storage services. It leverages Kubernetes resources to automate the deployment, configuration, provisioning, scaling, upgrading, and monitoring of Ceph.

    In this article, we will walk you through the process of installing, configuring, and utilizing Rook to use storage from an existing external Ceph cluster as a data disk for a VM within the Harvester environment.

    Install Harvester Cluster

    Harvester's operating system follows an immutable design, meaning that most OS files revert to their pre-configured state after a reboot. To accommodate Rook Ceph's requirements, you need to add specific persistent paths to the os.persistentStatePaths section in the Harvester configuration. These paths include:

    os:
    persistent_state_paths:
    - /var/lib/rook
    - /var/lib/ceph
    modules:
    - rbd
    - nbd

    After the cluster is installed, refer to How can I access the kubeconfig file of the Harvester cluster? to get the kubeconfig of the Harvester cluster.

    Install Rook to Harvester

    Install Rook to the Harvester cluster by referring to Rook Quickstart.

    curl -fsSLo rook.tar.gz https://github.com/rook/rook/archive/refs/tags/v1.12.2.tar.gz \
    && tar -zxf rook.tar.gz && cd rook-1.12.2/deploy/examples
    # apply configurations ref: https://rook.github.io/docs/rook/v1.12/Getting-Started/example-configurations/
    kubectl apply -f crds.yaml -f common.yaml -f operator.yaml
    kubectl -n rook-ceph wait --for=condition=Available deploy rook-ceph-operator --timeout=10m

    Using an existing external Ceph cluster

    1. Run the python script create-external-cluster-resources.py in the existing external Ceph cluster for creating all users and keys.
    # script help ref: https://www.rook.io/docs/rook/v1.12/CRDs/Cluster/external-cluster/#1-create-all-users-and-keys
    curl -s https://raw.githubusercontent.com/rook/rook/v1.12.2/deploy/examples/create-external-cluster-resources.py > create-external-cluster-resources.py
    python3 create-external-cluster-resources.py --rbd-data-pool-name <pool_name> --namespace rook-ceph-external --format bash
    1. Copy the Bash output.

    Example output:

    export NAMESPACE=rook-ceph-external
    export ROOK_EXTERNAL_FSID=b3b47828-4c60-11ee-be38-51902f85c805
    export ROOK_EXTERNAL_USERNAME=client.healthchecker
    export ROOK_EXTERNAL_CEPH_MON_DATA=ceph-1=192.168.5.99:6789
    export ROOK_EXTERNAL_USER_SECRET=AQDd6/dkFyu/IhAATv/uCMbHtWk4AYK2KXzBhQ==
    export ROOK_EXTERNAL_DASHBOARD_LINK=https://192.168.5.99:8443/
    export CSI_RBD_NODE_SECRET=AQDd6/dk2HsjIxAA06Yw9UcOg0dfwV/9IFBRhA==
    export CSI_RBD_NODE_SECRET_NAME=csi-rbd-node
    export CSI_RBD_PROVISIONER_SECRET=AQDd6/dkEY1kIxAAAzrXZnVRf4x+wDUz1zyaQg==
    export CSI_RBD_PROVISIONER_SECRET_NAME=csi-rbd-provisioner
    export MONITORING_ENDPOINT=192.168.5.99
    export MONITORING_ENDPOINT_PORT=9283
    export RBD_POOL_NAME=test
    export RGW_POOL_PREFIX=default
    1. Consume the external Ceph cluster resources on the Harvester cluster.
    # Paste the above output from create-external-cluster-resources.py into import-env.sh
    vim import-env.sh
    source import-env.sh
    # this script will create a StorageClass ceph-rbd
    source import-external-cluster.sh
    kubectl apply -f common-external.yaml
    kubectl apply -f cluster-external.yaml
    # wait for all pods to become Ready
    watch 'kubectl --namespace rook-ceph get pods'
    1. Create the VolumeSnapshotClass csi-rbdplugin-snapclass-external.
    cat >./csi/rbd/snapshotclass-external.yaml <<EOF
    ---
    apiVersion: snapshot.storage.k8s.io/v1
    kind: VolumeSnapshotClass
    metadata:
    name: csi-rbdplugin-snapclass-external
    driver: rook-ceph.rbd.csi.ceph.com # driver:namespace:operator
    parameters:
    clusterID: rook-ceph-external # namespace:cluster
    csi.storage.k8s.io/snapshotter-secret-name: rook-csi-rbd-provisioner
    csi.storage.k8s.io/snapshotter-secret-namespace: rook-ceph-external # namespace:cluster
    deletionPolicy: Delete
    EOF

    kubectl apply -f ./csi/rbd/snapshotclass-external.yaml

    Configure Harvester Cluster

    Before you can make use of Harvester's Backup & Snapshot features, you need to set up some essential configurations through the Harvester csi-driver-config setting. To set up these configurations, follow these steps:

    1. Login to the Harvester UI, then navigate to Advanced > Settings.
    2. Find and select csi-driver-config, and then click on the > Edit Setting to access the configuration options.
    3. In the settings, set the Provisioner to rook-ceph.rbd.csi.ceph.com.
    4. Next, specify the Volume Snapshot Class Name as csi-rbdplugin-snapclass-external. This setting points to the name of the VolumeSnapshotClass used for creating volume snapshots or VM snapshots.
    5. Similarly, set the Backup Volume Snapshot Class Name to csi-rbdplugin-snapclass-external. This corresponds to the name of the VolumeSnapshotClass responsible for creating VM backups.

    csi-driver-config-external

    Use Rook Ceph in Harvester

    After successfully configuring these settings, you can proceed to utilize the Rook Ceph StorageClass, which is named rook-ceph-block for the internal Ceph cluster or named ceph-rbd for the external Ceph cluster. You can apply this StorageClass when creating an empty volume or adding a new block volume to a VM, enhancing your Harvester cluster's storage capabilities.

    With these configurations in place, your Harvester cluster is ready to make the most of the Rook Ceph storage integration.

    rook-ceph-volume-external

    rook-ceph-vm-external

    - + \ No newline at end of file diff --git a/kb/tags/debug/index.html b/kb/tags/debug/index.html index 716eb064..626dee0f 100644 --- a/kb/tags/debug/index.html +++ b/kb/tags/debug/index.html @@ -9,13 +9,13 @@ One post tagged with "debug" | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    One post tagged with "debug"

    View All Tags

    · 2 min read
    Vicente Cheng

    Harvester OS is designed as an immutable operating system, which means you cannot directly install additional packages on it. While there is a way to install packages, it is strongly advised against doing so, as it may lead to system instability.

    If you only want to debug with the system, the preferred way is to package the toolbox image with all the needed packages.

    This article shares how to package your toolbox image and how to install any packages on the toolbox image that help you debug the system.

    For example, if you want to analyze a storage performance issue, you can install blktrace on the toolbox image.

    Create a Dockerfile

    FROM opensuse/leap:15.4

    # Install blktrace
    RUN zypper in -y \
    blktrace

    RUN zypper clean --all

    Build the image and push

    # assume you are in the directory of Dockerfile
    $ docker build -t harvester/toolbox:dev .
    .
    .
    .
    naming to docker.io/harvester/toolbox:dev ...
    $ docker push harvester/toolbox:dev
    .
    .
    d4b76d0683d4: Pushed
    a605baa225e2: Pushed
    9e9058bdf63c: Layer already exists

    After you build and push the image, you can run the toolbox using this image to trace storage performance.

    Run the toolbox

    # use `privileged` flag only when you needed. blktrace need debugfs, so I add extra mountpoint.
    docker run -it --privileged -v /sys/kernel/debug/:/sys/kernel/debug/ --rm harvester/toolbox:dev bash

    # test blktrace
    6ffa8eda3aaf:/ $ blktrace -d /dev/nvme0n1 -o - | blkparse -i -
    259,0 10 3414 0.020814875 34084 Q WS 2414127984 + 8 [fio]
    259,0 10 3415 0.020815190 34084 G WS 2414127984 + 8 [fio]
    259,0 10 3416 0.020815989 34084 C WS 3206896544 + 8 [0]
    259,0 10 3417 0.020816652 34084 C WS 2140319184 + 8 [0]
    259,0 10 3418 0.020817992 34084 P N [fio]
    259,0 10 3419 0.020818227 34084 U N [fio] 1
    259,0 10 3420 0.020818437 34084 D WS 2414127984 + 8 [fio]
    259,0 10 3421 0.020821826 34084 Q WS 1743934904 + 8 [fio]
    259,0 10 3422 0.020822150 34084 G WS 1743934904 + 8 [fio]

    - + \ No newline at end of file diff --git a/kb/tags/disk-performance/index.html b/kb/tags/disk-performance/index.html index 0928818e..f1a69a0d 100644 --- a/kb/tags/disk-performance/index.html +++ b/kb/tags/disk-performance/index.html @@ -9,13 +9,13 @@ One post tagged with "disk performance" | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    One post tagged with "disk performance"

    View All Tags

    · 2 min read
    David Ko
    Jillian Maroket

    The Longhorn documentation provides best practice recommendations for deploying Longhorn in production environments. Before configuring workloads, ensure that you have set up the following basic requirements for optimal disk performance.

    • SATA/NVMe SSDs or disk drives with similar performance
    • 10 Gbps network bandwidth between nodes
    • Dedicated Priority Classes for system-managed and user-deployed Longhorn components

    The following sections outline other recommendations for achieving optimal disk performance.

    IO Performance

    • Storage network: Use a dedicated storage network to improve IO performance and stability.

    • Longhorn disk: Use a dedicated disk for Longhorn storage instead of using the root disk.

    • Replica count: Set the default replica count to "2" to achieve data availability with better disk space usage or less impact to system performance. This practice is especially beneficial to data-intensive applications.

    • Storage tag: Use storage tags to define storage tiering for data-intensive applications. For example, only high-performance disks can be used for storing performance-sensitive data. You can either add disks with tags or create StorageClasses with tags.

    • Data locality: Use best-effort as the default data locality of Longhorn Storage Classes.

      For applications that support data replication (for example, a distributed database), you can use the strict-local option to ensure that only one replica is created for each volume. This practice prevents the extra disk space usage and IO performance overhead associated with volume replication.

      For data-intensive applications, you can use pod scheduling functions such as node selector or taint toleration. These functions allow you to schedule the workload to a specific storage-tagged node together with one replica.

    Space Efficiency

    • Recurring snapshots: Periodically clean up system-generated snapshots and retain only the number of snapshots that makes sense for your implementation.

      For applications with replication capability, periodically delete all types of snapshots.

    Disaster Recovery

    • Recurring backups: Create recurring backup jobs for mission-critical application volumes.

    • System backup: Run periodic system backups.

    - + \ No newline at end of file diff --git a/kb/tags/disk/index.html b/kb/tags/disk/index.html index f0556ac6..99a05c1c 100644 --- a/kb/tags/disk/index.html +++ b/kb/tags/disk/index.html @@ -9,14 +9,14 @@ One post tagged with "disk" | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    One post tagged with "disk"

    View All Tags

    · 2 min read
    Kiefer Chang

    Harvester replicates volumes data across disks in a cluster. Before removing a disk, the user needs to evict replicas on the disk to other disks to preserve the volumes' configured availability. For more information about eviction in Longhorn, please check Evicting Replicas on Disabled Disks or Nodes.

    Preparation

    This document describes how to evict Longhorn disks using the kubectl command. Before that, users must ensure the environment is set up correctly. There are two recommended ways to do this:

    1. Log in to any management node and switch to root (sudo -i).
    2. Download Kubeconfig file and use it locally
      • Install kubectl and yq program manually.
      • Open Harvester GUI, click support at the bottom left of the page and click Download KubeConfig to download the Kubeconfig file.
      • Set the Kubeconfig file's path to KUBECONFIG environment variable. For example, export KUBECONFIG=/path/to/kubeconfig.

    Evicting replicas from a disk

    1. List Longhorn nodes (names are identical to Kubernetes nodes):

      kubectl get -n longhorn-system nodes.longhorn.io

      Sample output:

      NAME    READY   ALLOWSCHEDULING   SCHEDULABLE   AGE
      node1 True true True 24d
      node2 True true True 24d
      node3 True true True 24d
    2. List disks on a node. Assume we want to evict replicas of a disk on node1:

      kubectl get -n longhorn-system nodes.longhorn.io node1 -o yaml | yq e '.spec.disks'

      Sample output:

      default-disk-ed7af10f5b8356be:
      allowScheduling: true
      evictionRequested: false
      path: /var/lib/harvester/defaultdisk
      storageReserved: 36900254515
      tags: []
    3. Assume disk default-disk-ed7af10f5b8356be is the target we want to evict replicas out of.

      Edit the node:

      kubectl edit -n longhorn-system nodes.longhorn.io node1 

      Update these two fields and save:

      • spec.disks.<disk_name>.allowScheduling to false
      • spec.disks.<disk_name>.evictionRequested to true

      Sample editing:

      default-disk-ed7af10f5b8356be:
      allowScheduling: false
      evictionRequested: true
      path: /var/lib/harvester/defaultdisk
      storageReserved: 36900254515
      tags: []
    4. Wait for all replicas on the disk to be evicted.

      Get current scheduled replicas on the disk:

      kubectl get -n longhorn-system nodes.longhorn.io node1 -o yaml | yq e '.status.diskStatus.default-disk-ed7af10f5b8356be.scheduledReplica'

      Sample output:

      pvc-86d3d212-d674-4c64-b69b-4a2eb1df2272-r-7b422db7: 5368709120
      pvc-b06f0b09-f30c-4936-8a2a-425b993dd6cb-r-bb0fa6b3: 2147483648
      pvc-b844bcc6-3b06-4367-a136-3909251cb560-r-08d1ab3c: 53687091200
      pvc-ea6e0dff-f446-4a38-916a-b3bea522f51c-r-193ca5c6: 10737418240

      Run the command repeatedly, and the output should eventually become an empty map:

      {}

      This means Longhorn evicts replicas on the disk to other disks.

      note

      If a replica always stays in a disk, please open the Longhorn GUI and check if there is free space on other disks.

    - + \ No newline at end of file diff --git a/kb/tags/filesystem/index.html b/kb/tags/filesystem/index.html index 98a769ec..109d69e1 100644 --- a/kb/tags/filesystem/index.html +++ b/kb/tags/filesystem/index.html @@ -9,14 +9,14 @@ One post tagged with "filesystem" | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    One post tagged with "filesystem"

    View All Tags

    · 4 min read
    Vicente Cheng

    In earlier versions of Harvester (v1.0.3 and prior), Longhorn volumes may get corrupted during the replica rebuilding process (reference: Analysis: Potential Data/Filesystem Corruption). In Harvester v1.1.0 and later versions, the Longhorn team has fixed this issue. This article covers manual steps you can take to scan the VM's filesystem and repair it if needed.

    Stop The VM And Backup Volume

    Before you scan the filesystem, it is recommend you back up the volume first. For an example, refer to the following steps to stop the VM and backup the volume.

    • Find the target VM.

    finding the target VM

    • Stop the target VM.

    Stop the target VM

    The target VM is stopped and the related volumes are detached. Now go to the Longhorn UI to backup this volume.

    • Enable Developer Tools & Features (Preferences -> Enable Developer Tools & Features).

    Preferences then enable developer mode Enable the developer mode

    • Click the button and select Edit Config to edit the config page of the VM.

    goto edit config page of VM

    • Go to the Volumes tab and select Check volume details.

    link to longhorn volume page

    • Click the dropdown menu on the right side and select 'Attach' to attach the volume again.

    attach this volume again

    • Select the attached node.

    choose the attached node

    • Check the volume attached under Volume Details and select Take Snapshot on this volume page.

    take snapshot on volume page

    • Confirm that the snapshot is ready.

    check the snapshot is ready

    Now that you completed the volume backup, you need to scan and repair the root filesystem.

    Scanning the root filesystem and repairing

    This section will introduce how to scan the filesystem (e.g., XFS, EXT4) using related tools.

    Before scanning, you need to know the filesystem's device/partition.

    • Identify the filesystem's device by checking the major and minor numbers of that device.
    1. Obtain the major and minor numbers from the listed volume information.

      In the following example, the volume name is pvc-ea7536c0-301f-479e-b2a2-e40ddc864b58.

      harvester-node-0:~ # ls /dev/longhorn/pvc-ea7536c0-301f-479e-b2a2-e40ddc864b58 -al
      brw-rw---- 1 root root 8, 0 Oct 23 14:43 /dev/longhorn/pvc-ea7536c0-301f-479e-b2a2-e40ddc864b58

      The output indicates that the major and minor numbers are 8:0.

    2. Obtain the device name from the output of the lsblk command.

      harvester-node-0:~ # lsblk
      NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINTS
      loop0 7:0 0 3G 1 loop /
      sda 8:0 0 40G 0 disk
      ├─sda1 8:1 0 2M 0 part
      ├─sda2 8:2 0 20M 0 part
      └─sda3 8:3 0 40G 0 part

      The output indicates that 8:0 are the major and minor numbers of the device named sda. Therefore, /dev/sda is related to the volume named pvc-ea7536c0-301f-479e-b2a2-e40ddc864b58.

    • You should now know the filesystem's partition. In the example below, sda3 is the filesystem's partition.
    • Use the Filesystem toolbox image to scan and repair.
    # docker run -it --rm --privileged registry.opensuse.org/isv/rancher/harvester/toolbox/main/fs-toolbox:latest -- bash

    Then we try to scan with this target device.

    XFS

    When scanning an XFS filesystem, use the xfs_repair command and specify the problematic partition of the device.

    In the following example, /dev/sda3 is the problematic partition.

    # xfs_repair -n /dev/sda3

    To repair the corrupted partition, run the following command.

    # xfs_repair /dev/sda3

    EXT4

    When scanning a EXT4 filesystem, use the e2fsck command as follows, where the /dev/sde1 is the problematic partition of the device.

    # e2fsck -f /dev/sde1

    To repair the corrupted partition, run the following command.

    # e2fsck -fp /dev/sde1

    After using the 'e2fsck' command, you should also see logs related to scanning and repairing the partition. Scanning and repairing the corrupted partition is successful if there are no errors in these logs.

    Detach and Start VM again.

    After the corrupted partition is scanned and repaired, detach the volume and try to start the related VM again.

    • Detach the volume from the Longhorn UI.

    detach volume on longhorn UI

    • Start the related VM again from the Harvester UI.

    Start VM again

    Your VM should now work normally.

    - + \ No newline at end of file diff --git a/kb/tags/harvester/index.html b/kb/tags/harvester/index.html index 9651b650..856cd112 100644 --- a/kb/tags/harvester/index.html +++ b/kb/tags/harvester/index.html @@ -9,12 +9,12 @@ 8 posts tagged with "harvester" | The open-source hyperconverged infrastructure solution for a cloud-native world - +
    -

    8 posts tagged with "harvester"

    View All Tags

    · 3 min read
    Jian Wang

    Harvester calculates the resource metrics using data that is dynamically collected from the system. Host-level resource metrics are calculated and then aggregated to obtain the cluster-level metrics.

    You can view resource-related metrics on the Harvester UI.

    • Hosts screen: Displays host-level metrics

      host level resources metrics

    • Dashboard screen: Displays cluster-level metrics

      cluster level resources metrics

    CPU and Memory

    The following sections describe the data sources and calculation methods for CPU and memory resources.

    • Resource capacity: Baseline data
    • Resource usage: Data source for the Used field on the Hosts screen
    • Resource reservation: Data source for the Reserved field on the Hosts screen

    Resource Capacity

    In Kubernetes, a Node object is created for each host.

    The .status.allocatable.cpu and .status.allocatable.memory represent the available CPU and Memory resources of a host.

    # kubectl get nodes -A -oyaml
    apiVersion: v1
    items:
    - apiVersion: v1
    kind: Node
    metadata:
    ..
    management.cattle.io/pod-limits: '{"cpu":"12715m","devices.kubevirt.io/kvm":"1","devices.kubevirt.io/tun":"1","devices.kubevirt.io/vhost-net":"1","memory":"17104951040"}'
    management.cattle.io/pod-requests: '{"cpu":"5657m","devices.kubevirt.io/kvm":"1","devices.kubevirt.io/tun":"1","devices.kubevirt.io/vhost-net":"1","ephemeral-storage":"50M","memory":"9155862208","pods":"78"}'
    node.alpha.kubernetes.io/ttl: "0"
    ..
    name: harv41
    resourceVersion: "2170215"
    uid: b6f5850a-2fbc-4aef-8fbe-121dfb671b67
    spec:
    podCIDR: 10.52.0.0/24
    podCIDRs:
    - 10.52.0.0/24
    providerID: rke2://harv41
    status:
    addresses:
    - address: 192.168.122.141
    type: InternalIP
    - address: harv41
    type: Hostname
    allocatable:
    cpu: "10"
    devices.kubevirt.io/kvm: 1k
    devices.kubevirt.io/tun: 1k
    devices.kubevirt.io/vhost-net: 1k
    ephemeral-storage: "149527126718"
    hugepages-1Gi: "0"
    hugepages-2Mi: "0"
    memory: 20464216Ki
    pods: "200"
    capacity:
    cpu: "10"
    devices.kubevirt.io/kvm: 1k
    devices.kubevirt.io/tun: 1k
    devices.kubevirt.io/vhost-net: 1k
    ephemeral-storage: 153707984Ki
    hugepages-1Gi: "0"
    hugepages-2Mi: "0"
    memory: 20464216Ki
    pods: "200"

    Resource Usage

    CPU and memory usage data is continuously collected and stored in the NodeMetrics object. Harvester reads the data from usage.cpu and usage.memory.

    # kubectl get NodeMetrics -A -oyaml
    apiVersion: v1
    items:
    - apiVersion: metrics.k8s.io/v1beta1
    kind: NodeMetrics
    metadata:
    ...
    name: harv41
    timestamp: "2024-01-23T12:04:44Z"
    usage:
    cpu: 891736742n
    memory: 9845008Ki
    window: 10.149s

    Resource Reservation

    Harvester dynamically calculates the resource limits and requests of all pods running on a host, and updates the information to the annotations of the NodeMetrics object.

          management.cattle.io/pod-limits: '{"cpu":"12715m",...,"memory":"17104951040"}'
    management.cattle.io/pod-requests: '{"cpu":"5657m",...,"memory":"9155862208"}'

    For more information, see Requests and Limits in the Kubernetes documentation.

    Storage

    Longhorn is the default Container Storage Interface (CSI) driver of Harvester, providing storage management features such as distributed block storage and tiering.

    Reserved Storage in Longhorn

    Longhorn allows you to specify the percentage of disk space that is not allocated to the default disk on each new Longhorn node. The default value is "30". For more information, see Storage Reserved Percentage For Default Disk in the Longhorn documentation.

    Depending on the disk size, you can modify the default value using the embedded Longhorn UI.

    ::: note

    Before changing the settings, read the Longhorn documentation carefully.

    :::

    Data Sources and Calculation

    Harvester uses the following data to calculate metrics for storage resources.

    • Sum of the storageMaximum values of all disks (status.diskStatus.disk-name): Total storage capacity

    • Total storage capacity - Sum of the storageAvailable values of all disks (status.diskStatus.disk-name): Data source for the Used field on the Hosts screen

    • Sum of the storageReserved values of all disks (spec.disks): Data source for the Reserved field on the Hosts screen

    # kubectl get nodes.longhorn.io -n longhorn-system -oyaml

    apiVersion: v1
    items:
    - apiVersion: longhorn.io/v1beta2
    kind: Node
    metadata:
    ..
    name: harv41
    namespace: longhorn-system
    ..
    spec:
    allowScheduling: true
    disks:
    default-disk-ef11a18c36b01132:
    allowScheduling: true
    diskType: filesystem
    evictionRequested: false
    path: /var/lib/harvester/defaultdisk
    storageReserved: 24220101427
    tags: []
    ..
    status:
    ..
    diskStatus:
    default-disk-ef11a18c36b01132:
    ..
    diskType: filesystem
    diskUUID: d2788933-8817-44c6-b688-dee414cc1f73
    scheduledReplica:
    pvc-95561210-c39c-4c2e-ac9a-4a9bd72b3100-r-20affeca: 2147483648
    pvc-9e83b2dc-6a4b-4499-ba70-70dc25b2d9aa-r-4ad05c86: 32212254720
    pvc-bc25be1e-ca4e-4818-a16d-48353a0f2f96-r-c7b88c60: 3221225472
    pvc-d9d3e54d-8d67-4740-861e-6373f670f1e4-r-f4c7c338: 2147483648
    pvc-e954b5fe-bbd7-4d44-9866-6ff6684d5708-r-ba6b87b6: 5368709120
    storageAvailable: 77699481600
    storageMaximum: 80733671424
    storageScheduled: 45097156608
    region: ""
    snapshotCheckStatus: {}
    zone: ""

    · 2 min read
    David Ko
    Jillian Maroket

    The Longhorn documentation provides best practice recommendations for deploying Longhorn in production environments. Before configuring workloads, ensure that you have set up the following basic requirements for optimal disk performance.

    • SATA/NVMe SSDs or disk drives with similar performance
    • 10 Gbps network bandwidth between nodes
    • Dedicated Priority Classes for system-managed and user-deployed Longhorn components

    The following sections outline other recommendations for achieving optimal disk performance.

    IO Performance

    • Storage network: Use a dedicated storage network to improve IO performance and stability.

    • Longhorn disk: Use a dedicated disk for Longhorn storage instead of using the root disk.

    • Replica count: Set the default replica count to "2" to achieve data availability with better disk space usage or less impact to system performance. This practice is especially beneficial to data-intensive applications.

    • Storage tag: Use storage tags to define storage tiering for data-intensive applications. For example, only high-performance disks can be used for storing performance-sensitive data. You can either add disks with tags or create StorageClasses with tags.

    • Data locality: Use best-effort as the default data locality of Longhorn Storage Classes.

      For applications that support data replication (for example, a distributed database), you can use the strict-local option to ensure that only one replica is created for each volume. This practice prevents the extra disk space usage and IO performance overhead associated with volume replication.

      For data-intensive applications, you can use pod scheduling functions such as node selector or taint toleration. These functions allow you to schedule the workload to a specific storage-tagged node together with one replica.

    Space Efficiency

    • Recurring snapshots: Periodically clean up system-generated snapshots and retain only the number of snapshots that makes sense for your implementation.

      For applications with replication capability, periodically delete all types of snapshots.

    Disaster Recovery

    • Recurring backups: Create recurring backup jobs for mission-critical application volumes.

    • System backup: Run periodic system backups.

    · 11 min read
    Jian Wang

    In Harvester, the VM Live Migration is well supported by the UI. Please refer to Harvester VM Live Migration for more details.

    The VM Live Migration process is finished smoothly in most cases. However, sometimes the migration may get stuck and not end as expected.

    This article dives into the VM Live Migration process in more detail. There are three main parts:

    • General Process of VM Live Migration
    • VM Live Migration Strategies
    • VM Live Migration Configurations

    Related issues:

    note

    A big part of the following contents are copied from kubevirt document https://kubevirt.io/user-guide/operations/live_migration/, some contents/formats are adjusted to fit in this document.

    General Process of VM Live Migration

    Starting a Migration from Harvester UI

    1. Go to the Virtual Machines page.
    2. Find the virtual machine that you want to migrate and select > Migrate.
    3. Choose the node to which you want to migrate the virtual machine and select Apply.

    After successfully selecting Apply, a CRD VirtualMachineInstanceMigration object is created, and the related controller/operator will start the process.

    Migration CRD Object

    You can also create the CRD VirtualMachineInstanceMigration object manually via kubectl or other tools.

    The example below starts a migration process for a virtual machine instance (VMI) new-vm.

    apiVersion: kubevirt.io/v1
    kind: VirtualMachineInstanceMigration
    metadata:
    name: migration-job
    spec:
    vmiName: new-vm

    Under the hood, the open source projects Kubevirt, Libvirt, QEMU, ... perform most of the VM Live Migration. References.

    Migration Status Reporting

    When starting a virtual machine instance (VMI), it has also been calculated whether the machine is live migratable. The result is being stored in the VMI VMI.status.conditions. The calculation can be based on multiple parameters of the VMI, however, at the moment, the calculation is largely based on the Access Mode of the VMI volumes. Live migration is only permitted when the volume access mode is set to ReadWriteMany. Requests to migrate a non-LiveMigratable VMI will be rejected.

    The reported Migration Method is also being calculated during VMI start. BlockMigration indicates that some of the VMI disks require copying from the source to the destination. LiveMigration means that only the instance memory will be copied.

    Status:
    Conditions:
    Status: True
    Type: LiveMigratable
    Migration Method: BlockMigration

    Migration Status

    The migration progress status is reported in VMI.status. Most importantly, it indicates whether the migration has been completed or failed.

    Below is an example of a successful migration.

    Migration State:
    Completed: true
    End Timestamp: 2019-03-29T03:37:52Z
    Migration Config:
    Completion Timeout Per GiB: 800
    Progress Timeout: 150
    Migration UID: c64d4898-51d3-11e9-b370-525500d15501
    Source Node: node02
    Start Timestamp: 2019-03-29T04:02:47Z
    Target Direct Migration Node Ports:
    35001: 0
    41068: 49152
    38284: 49153
    Target Node: node01
    Target Node Address: 10.128.0.46
    Target Node Domain Detected: true
    Target Pod: virt-launcher-testvmimcbjgw6zrzcmp8wpddvztvzm7x2k6cjbdgktwv8tkq

    VM Live Migration Strategies

    VM Live Migration is a process during which a running Virtual Machine Instance moves to another compute node while the guest workload continues to run and remain accessible.

    Understanding Different VM Live Migration Strategies

    VM Live Migration is a complex process. During a migration, the source VM needs to transfer its whole state (mainly RAM) to the target VM. If there are enough resources available, such as network bandwidth and CPU power, migrations should converge nicely. If this is not the scenario, however, the migration might get stuck without an ability to progress.

    The main factor that affects migrations from the guest perspective is its dirty rate, which is the rate by which the VM dirties memory. Guests with high dirty rate lead to a race during migration. On the one hand, memory would be transferred continuously to the target, and on the other, the same memory would get dirty by the guest. On such scenarios, one could consider to use more advanced migration strategies. Refer to Understanding different migration strategies for more details.

    There are 3 VM Live Migration strategies/policies:

    VM Live Migration Strategy: Pre-copy

    Pre-copy is the default strategy. It should be used for most cases.

    The way it works is as following:

    1. The target VM is created, but the guest keeps running on the source VM.
    2. The source starts sending chunks of VM state (mostly memory) to the target. This continues until all of the state has been transferred to the target.
    3. The guest starts executing on the target VM. 4. The source VM is being removed.

    Pre-copy is the safest and fastest strategy for most cases. Furthermore, it can be easily cancelled, can utilize multithreading, and more. If there is no real reason to use another strategy, this is definitely the strategy to go with.

    However, on some cases migrations might not converge easily, that is, by the time the chunk of source VM state would be received by the target VM, it would already be mutated by the source VM (which is the VM the guest executes on). There are many reasons for migrations to fail converging, such as a high dirty-rate or low resources like network bandwidth and CPU. On such scenarios, see the following alternative strategies below.

    VM Live Migration Strategy: Post-copy

    The way post-copy migrations work is as following:

    1. The target VM is created.
    2. The guest is being run on the target VM.
    3. The source starts sending chunks of VM state (mostly memory) to the target.
    4. When the guest, running on the target VM, would access memory: 1. If the memory exists on the target VM, the guest can access it. 2. Otherwise, the target VM asks for a chunk of memory from the source VM.
    5. Once all of the memory state is updated at the target VM, the source VM is being removed.

    The main idea here is that the guest starts to run immediately on the target VM. This approach has advantages and disadvantages:

    Advantages:

    • The same memory chink is never being transferred twice. This is possible due to the fact that with post-copy it doesn't matter that a page had been dirtied since the guest is already running on the target VM.
    • This means that a high dirty-rate has much less effect.
    • Consumes less network bandwidth.

    Disadvantages:

    • When using post-copy, the VM state has no one source of truth. When the guest (running on the target VM) writes to memory, this memory is one part of the guest's state, but some other parts of it may still be updated only at the source VM. This situation is generally dangerous, since, for example, if either the target or guest VMs crash the state cannot be recovered.
    • Slow warmup: when the guest starts executing, no memory is present at the target VM. Therefore, the guest would have to wait for a lot of memory in a short period of time.
    • Slower than pre-copy on most cases.
    • Harder to cancel a migration.

    VM Live Migration Strategy: Auto-converge

    Auto-converge is a technique to help pre-copy migrations converge faster without changing the core algorithm of how the migration works.

    Since a high dirty-rate is usually the most significant factor for migrations to not converge, auto-converge simply throttles the guest's CPU. If the migration would converge fast enough, the guest's CPU would not be throttled or throttled negligibly. But, if the migration would not converge fast enough, the CPU would be throttled more and more as time goes.

    This technique dramatically increases the probability of the migration converging eventually.

    Observe the VM Live Migration Progress and Result

    Migration Timeouts

    Depending on the type, the live migration process will copy virtual machine memory pages and disk blocks to the destination. During this process non-locked pages and blocks are being copied and become free for the instance to use again. To achieve a successful migration, it is assumed that the instance will write to the free pages and blocks (pollute the pages) at a lower rate than these are being copied.

    Completion Time

    In some cases the virtual machine can write to different memory pages / disk blocks at a higher rate than these can be copied, which will prevent the migration process from completing in a reasonable amount of time. In this case, live migration will be aborted if it is running for a long period of time. The timeout is calculated base on the size of the VMI, it's memory and the ephemeral disks that are needed to be copied. The configurable parameter completionTimeoutPerGiB, which defaults to 800s is the time for GiB of data to wait for the migration to be completed before aborting it. A VMI with 8Gib of memory will time out after 6400 seconds.

    Progress Timeout

    A VM Live Migration will also be aborted when it notices that copying memory doesn't make any progress. The time to wait for live migration to make progress in transferring data is configurable by the progressTimeout parameter, which defaults to 150 seconds.

    VM Live Migration Configurations

    Changing Cluster Wide Migration Limits

    KubeVirt puts some limits in place so that migrations don't overwhelm the cluster. By default, it is to only run 5 migrations in parallel with an additional limit of a maximum of 2 outbound migrations per node. Finally, every migration is limited to a bandwidth of 64MiB/s.

    You can change these values in the kubevirt CR:

        apiVersion: kubevirt.io/v1
    kind: Kubevirt
    metadata:
    name: kubevirt
    namespace: kubevirt
    spec:
    configuration:
    migrations:
    parallelMigrationsPerCluster: 5
    parallelOutboundMigrationsPerNode: 2
    bandwidthPerMigration: 64Mi
    completionTimeoutPerGiB: 800
    progressTimeout: 150
    disableTLS: false
    nodeDrainTaintKey: "kubevirt.io/drain"
    allowAutoConverge: false ---------------------> related to: Auto-converge
    allowPostCopy: false -------------------------> related to: Post-copy
    unsafeMigrationOverride: false

    Remember that most of these configurations can be overridden and fine-tuned to a specified group of VMs. For more information, please refer to the Migration Policies section below.

    Migration Policies

    Migration policies provides a new way of applying migration configurations to Virtual Machines. The policies can refine Kubevirt CR's MigrationConfiguration that sets the cluster-wide migration configurations. This way, the cluster-wide settings default how the migration policy can be refined (i.e., changed, removed, or added).

    Remember that migration policies are in version v1alpha1. This means that this API is not fully stable yet and that APIs may change in the future.

    Migration Configurations

    Currently, the MigrationPolicy spec only includes the following configurations from Kubevirt CR's MigrationConfiguration. (In the future, more configurations that aren't part of Kubevirt CR will be added):

    apiVersion: migrations.kubevirt.io/v1alpha1
    kind: MigrationPolicy
    spec:
    allowAutoConverge: true
    bandwidthPerMigration: 217Ki
    completionTimeoutPerGiB: 23
    allowPostCopy: false

    All the above fields are optional. When omitted, the configuration will be applied as defined in KubevirtCR's MigrationConfiguration. This way, KubevirtCR will serve as a configurable set of defaults for both VMs that are not bound to any MigrationPolicy and VMs that are bound to a MigrationPolicy that does not define all fields of the configurations.

    Matching Policies to VMs

    Next in the spec are the selectors defining the group of VMs to apply the policy. The options to do so are the following.

    This policy applies to the VMs in namespaces that have all the required labels:

    apiVersion: migrations.kubevirt.io/v1alpha1
    kind: MigrationPolicy
    spec:
    selectors:
    namespaceSelector:
    hpc-workloads: true # Matches a key and a value

    The policy below applies to the VMs that have all the required labels:

    apiVersion: migrations.kubevirt.io/v1alpha1
    kind: MigrationPolicy
    spec:
    selectors:
    virtualMachineInstanceSelector:
    workload-type: db # Matches a key and a value

    References

    Documents

    Libvirt Guest Migration

    Libvirt has a chapter to describe the pricipal of VM/Guest Live Migration.

    https://libvirt.org/migration.html

    Kubevirt Live Migration

    https://kubevirt.io/user-guide/operations/live_migration/

    Source Code

    The VM Live Migration related configuration options are passed to each layer correspondingly.

    Kubevirt

    https://github.com/kubevirt/kubevirt/blob/d425593ae392111dab80403ef0cde82625e37653/pkg/virt-launcher/virtwrap/live-migration-source.go#L103

    ...
    import "libvirt.org/go/libvirt"

    ...

    func generateMigrationFlags(isBlockMigration, migratePaused bool, options *cmdclient.MigrationOptions) libvirt.DomainMigrateFlags {
    ...
    if options.AllowAutoConverge {
    migrateFlags |= libvirt.MIGRATE_AUTO_CONVERGE
    }
    if options.AllowPostCopy {
    migrateFlags |= libvirt.MIGRATE_POSTCOPY
    }
    ...
    }

    Go Package Libvirt

    https://pkg.go.dev/libvirt.org/go/libvirt

    const (
    ...
    MIGRATE_AUTO_CONVERGE = DomainMigrateFlags(C.VIR_MIGRATE_AUTO_CONVERGE)
    MIGRATE_RDMA_PIN_ALL = DomainMigrateFlags(C.VIR_MIGRATE_RDMA_PIN_ALL)
    MIGRATE_POSTCOPY = DomainMigrateFlags(C.VIR_MIGRATE_POSTCOPY)
    ...
    )

    Libvirt

    https://github.com/libvirt/libvirt/blob/bfe53e9145cd5996a791c5caff0686572b850f82/include/libvirt/libvirt-domain.h#L1030

        /* Enable algorithms that ensure a live migration will eventually converge.
    * This usually means the domain will be slowed down to make sure it does
    * not change its memory faster than a hypervisor can transfer the changed
    * memory to the destination host. VIR_MIGRATE_PARAM_AUTO_CONVERGE_*
    * parameters can be used to tune the algorithm.
    *
    * Since: 1.2.3
    */
    VIR_MIGRATE_AUTO_CONVERGE = (1 << 13),
    ...
    /* Setting the VIR_MIGRATE_POSTCOPY flag tells libvirt to enable post-copy
    * migration. However, the migration will start normally and
    * virDomainMigrateStartPostCopy needs to be called to switch it into the
    * post-copy mode. See virDomainMigrateStartPostCopy for more details.
    *
    * Since: 1.3.3
    */
    VIR_MIGRATE_POSTCOPY = (1 << 15),

    · 4 min read
    Hang Yu

    Starting with Harvester v1.2.0, it offers the capability to install a Container Storage Interface (CSI) in your Harvester cluster. This allows you to leverage external storage for the Virtual Machine's non-system data disk, giving you the flexibility to use different drivers tailored for specific needs, whether it's for performance optimization or seamless integration with your existing in-house storage solutions.

    It's important to note that, despite this enhancement, the provisioner for the Virtual Machine (VM) image in Harvester still relies on Longhorn. Prior to version 1.2.0, Harvester exclusively supported Longhorn for storing VM data and did not offer support for external storage as a destination for VM data.

    One of the options for integrating external storage with Harvester is Rook, an open-source cloud-native storage orchestrator. Rook provides a robust platform, framework, and support for Ceph storage, enabling seamless integration with cloud-native environments.

    Ceph is a software-defined distributed storage system that offers versatile storage capabilities, including file, block, and object storage. It is designed for large-scale production clusters and can be deployed effectively in such environments.

    Rook simplifies the deployment and management of Ceph, offering self-managing, self-scaling, and self-healing storage services. It leverages Kubernetes resources to automate the deployment, configuration, provisioning, scaling, upgrading, and monitoring of Ceph.

    In this article, we will walk you through the process of installing, configuring, and utilizing Rook to use storage from an existing external Ceph cluster as a data disk for a VM within the Harvester environment.

    Install Harvester Cluster

    Harvester's operating system follows an immutable design, meaning that most OS files revert to their pre-configured state after a reboot. To accommodate Rook Ceph's requirements, you need to add specific persistent paths to the os.persistentStatePaths section in the Harvester configuration. These paths include:

    os:
    persistent_state_paths:
    - /var/lib/rook
    - /var/lib/ceph
    modules:
    - rbd
    - nbd

    After the cluster is installed, refer to How can I access the kubeconfig file of the Harvester cluster? to get the kubeconfig of the Harvester cluster.

    Install Rook to Harvester

    Install Rook to the Harvester cluster by referring to Rook Quickstart.

    curl -fsSLo rook.tar.gz https://github.com/rook/rook/archive/refs/tags/v1.12.2.tar.gz \
    && tar -zxf rook.tar.gz && cd rook-1.12.2/deploy/examples
    # apply configurations ref: https://rook.github.io/docs/rook/v1.12/Getting-Started/example-configurations/
    kubectl apply -f crds.yaml -f common.yaml -f operator.yaml
    kubectl -n rook-ceph wait --for=condition=Available deploy rook-ceph-operator --timeout=10m

    Using an existing external Ceph cluster

    1. Run the python script create-external-cluster-resources.py in the existing external Ceph cluster for creating all users and keys.
    # script help ref: https://www.rook.io/docs/rook/v1.12/CRDs/Cluster/external-cluster/#1-create-all-users-and-keys
    curl -s https://raw.githubusercontent.com/rook/rook/v1.12.2/deploy/examples/create-external-cluster-resources.py > create-external-cluster-resources.py
    python3 create-external-cluster-resources.py --rbd-data-pool-name <pool_name> --namespace rook-ceph-external --format bash
    1. Copy the Bash output.

    Example output:

    export NAMESPACE=rook-ceph-external
    export ROOK_EXTERNAL_FSID=b3b47828-4c60-11ee-be38-51902f85c805
    export ROOK_EXTERNAL_USERNAME=client.healthchecker
    export ROOK_EXTERNAL_CEPH_MON_DATA=ceph-1=192.168.5.99:6789
    export ROOK_EXTERNAL_USER_SECRET=AQDd6/dkFyu/IhAATv/uCMbHtWk4AYK2KXzBhQ==
    export ROOK_EXTERNAL_DASHBOARD_LINK=https://192.168.5.99:8443/
    export CSI_RBD_NODE_SECRET=AQDd6/dk2HsjIxAA06Yw9UcOg0dfwV/9IFBRhA==
    export CSI_RBD_NODE_SECRET_NAME=csi-rbd-node
    export CSI_RBD_PROVISIONER_SECRET=AQDd6/dkEY1kIxAAAzrXZnVRf4x+wDUz1zyaQg==
    export CSI_RBD_PROVISIONER_SECRET_NAME=csi-rbd-provisioner
    export MONITORING_ENDPOINT=192.168.5.99
    export MONITORING_ENDPOINT_PORT=9283
    export RBD_POOL_NAME=test
    export RGW_POOL_PREFIX=default
    1. Consume the external Ceph cluster resources on the Harvester cluster.
    # Paste the above output from create-external-cluster-resources.py into import-env.sh
    vim import-env.sh
    source import-env.sh
    # this script will create a StorageClass ceph-rbd
    source import-external-cluster.sh
    kubectl apply -f common-external.yaml
    kubectl apply -f cluster-external.yaml
    # wait for all pods to become Ready
    watch 'kubectl --namespace rook-ceph get pods'
    1. Create the VolumeSnapshotClass csi-rbdplugin-snapclass-external.
    cat >./csi/rbd/snapshotclass-external.yaml <<EOF
    ---
    apiVersion: snapshot.storage.k8s.io/v1
    kind: VolumeSnapshotClass
    metadata:
    name: csi-rbdplugin-snapclass-external
    driver: rook-ceph.rbd.csi.ceph.com # driver:namespace:operator
    parameters:
    clusterID: rook-ceph-external # namespace:cluster
    csi.storage.k8s.io/snapshotter-secret-name: rook-csi-rbd-provisioner
    csi.storage.k8s.io/snapshotter-secret-namespace: rook-ceph-external # namespace:cluster
    deletionPolicy: Delete
    EOF

    kubectl apply -f ./csi/rbd/snapshotclass-external.yaml

    Configure Harvester Cluster

    Before you can make use of Harvester's Backup & Snapshot features, you need to set up some essential configurations through the Harvester csi-driver-config setting. To set up these configurations, follow these steps:

    1. Login to the Harvester UI, then navigate to Advanced > Settings.
    2. Find and select csi-driver-config, and then click on the > Edit Setting to access the configuration options.
    3. In the settings, set the Provisioner to rook-ceph.rbd.csi.ceph.com.
    4. Next, specify the Volume Snapshot Class Name as csi-rbdplugin-snapclass-external. This setting points to the name of the VolumeSnapshotClass used for creating volume snapshots or VM snapshots.
    5. Similarly, set the Backup Volume Snapshot Class Name to csi-rbdplugin-snapclass-external. This corresponds to the name of the VolumeSnapshotClass responsible for creating VM backups.

    csi-driver-config-external

    Use Rook Ceph in Harvester

    After successfully configuring these settings, you can proceed to utilize the Rook Ceph StorageClass, which is named rook-ceph-block for the internal Ceph cluster or named ceph-rbd for the external Ceph cluster. You can apply this StorageClass when creating an empty volume or adding a new block volume to a VM, enhancing your Harvester cluster's storage capabilities.

    With these configurations in place, your Harvester cluster is ready to make the most of the Rook Ceph storage integration.

    rook-ceph-volume-external

    rook-ceph-vm-external

    · 3 min read
    Canwu Yao

    As Harvester v1.2.0 is released, a new Harvester cloud provider version 0.2.2 is integrated into RKE2 v1.24.15+rke2r1, v1.25.11+rke2r1, v1.26.6+rke2r1, v1.27.3+rke2r1, and newer versions.

    With Harvester v1.2.0, the new Harvester cloud provider offers enhanced load balancing capabilities for guest Kubernetes services. Specifically, it introduces the Harvester IP Pool feature, a built-in IP address management (IPAM) solution for the Harvester load balancer. It allows you to define an IP pool specific to a particular guest cluster by specifying the guest cluster name. For example, you can create an IP pool exclusively for the guest cluster named cluster2:

    image

    However, after upgrading, the feature is not automatically compatible with existing guest Kubernetes clusters, as they do not pass the correct cluster name to the Harvester cloud provider. Refer to issue 4232 for more details. Users can manually upgrade the Harvester cloud provider using Helm as a workaround and provide the correct cluster name after upgrading. However, this would result in a change in the load balancer IPs.

    This article outlines a workaround that allows you to leverage the new IP pool feature while keeping the load balancer IPs unchanged.

    Prerequisites

    • Download the Harvester kubeconfig file from the Harvester UI. If you have imported Harvester into Rancher, do not use the kubeconfig file from the Rancher UI. Refer to Access Harvester Cluster to get the desired one.

    • Download the kubeconfig file for the guest Kubernetes cluster you plan to upgrade. Refer to Accessing Clusters with kubectl from Your Workstation for instructions on how to download the kubeconfig file.

    Steps to Keep Load Balancer IP

    1. Execute the following script before upgrading.

      curl -sfL https://raw.githubusercontent.com/harvester/harvesterhci.io/main/kb/2023-08-21/keepip.sh | sh -s before_upgrade <Harvester-kubeconfig-path> <guest-cluster-kubeconfig-path> <guest-cluster-name> <guest-cluster-nodes-namespace>
      • <Harvester-kubeconfig-path>: Path to the Harvester kubeconfig file.
      • <guest-cluster-kubeconfig-path>: Path to the kubeconfig file of your guest Kubernetes cluster.
      • <guest-cluster-name>: Name of your guest cluster.
      • <guest-cluster-nodes-namespace>: Namespace where the VMs of the guest cluster are located.

      The script will help users copy the DHCP information to the service annotation and modify the IP pool allocated history to make sure the IP is unchanged.

      image

      After executing the script, the load balancer service with DHCP mode will be annotated with the DHCP information. For example:

      apiVersion: v1
      kind: Service
      metadata:
      annotations:
      kube-vip.io/hwaddr: 00:00:6c:4f:18:68
      kube-vip.io/requestedIP: 172.19.105.215
      name: lb0
      namespace: default

      As for the load balancer service with pool mode, the IP pool allocated history will be modified as the new load balancer name. For example:

      apiVersion: loadbalancer.harvesterhci.io/v1beta1
      kind: IPPool
      metadata:
      name: default
      spec:
      ...
      status:
      allocatedHistory:
      192.168.100.2: default/cluster-name-default-lb1-ddc13071 # replace the new load balancer name
    2. Add network selector for the pool.

      For example, the following cluster is under the VM network default/mgmt-untagged. The network selector should be default/mgmt-untagged.

      image

      image

    3. Upgrade the RKE2 cluster in the Rancher UI and select the new version.

      image

    4. Execute the script after upgrading.

      curl -sfL https://raw.githubusercontent.com/harvester/harvesterhci.io/main/kb/2023-08-21/keepip.sh | sh -s after_upgrade <Harvester-kubeconfig-path> <guest-cluster-kubeconfig-path> <guest-cluster-name> <guest-cluster-nodes-namespace>

      image

      In this step, the script wraps the operations to upgrade the Harvester cloud provider to set the cluster name. After the Harvester cloud provider is running, the new Harvester load balancers will be created with the unchanged IPs.

    · 7 min read

    This article covers instructions for installing the Netapp Astra Trident CSI driver into a Harvester cluster, which enables NetApp storage systems to store storage volumes usable by virtual machines running in Harvester.

    The NetApp storage will be an option in addition to the normal Longhorn storage; it will not replace Longhorn. Virtual machine images will still be stored using Longhorn.

    This has been tested with Harvester 1.2.0 and Trident v23.07.0.

    This procedure only works to access storage via iSCSI, not NFS.

    note

    3rd party storage classes (including those based on Trident) can only be used for non-boot volumes of Harvester VMs.

    Detailed Instructions

    We assume that before beginning this procedure, a Harvester cluster and a NetApp ONTAP storage system are both installed and configured for use.

    Most of these steps can be performed on any system with the helm and kubectl commands installed and network connectivity to the management port of the Harvester cluster. Let's call this your workstation. Certain steps must be performed on one or more cluster nodes themselves. The steps described below should be done on your workstation unless otherwise indicated.

    The last step (enabling multipathd) should be done on all nodes after the Trident CSI has been installed.

    Certain parameters of your installation will require modification of details in the examples in the procedure given below. Those which you may wish to modify include:

    • The namespace. trident is used as the namespace in the examples, but you may prefer to use another.
    • The name of the deployment. mytrident is used but you can change this to something else.
    • The management IP address of the ONTAP storage system
    • Login credentials (username and password) of the ONTAP storage system

    The procedure is as follows.

    1. Read the NetApp Astra Trident documentation:

      The simplest method is to install using Helm; that process is described here.

    2. Download the KubeConfig from the Harvester cluster.

      • Open the web UI for your Harvester cluster
      • In the lower left corner, click the "Support" link. This will take you to a "Harvester Support" page.
      • Click the button labeled "Download KubeConfig". This will download a your cluster config in a file called "local.yaml" by default.
      • Move this file to a convenient location and set your KUBECONFIG environment variable to the path of this file.
    3. Prepare the cluster for installation of the Helm chart.

      Before starting installation of the helm chart, special authorization must be provided to enable certain modifications to be made during the installation. +

      8 posts tagged with "harvester"

      View All Tags

      · 3 min read
      Jian Wang

      Harvester calculates the resource metrics using data that is dynamically collected from the system. Host-level resource metrics are calculated and then aggregated to obtain the cluster-level metrics.

      You can view resource-related metrics on the Harvester UI.

      • Hosts screen: Displays host-level metrics

        host level resources metrics

      • Dashboard screen: Displays cluster-level metrics

        cluster level resources metrics

      CPU and Memory

      The following sections describe the data sources and calculation methods for CPU and memory resources.

      • Resource capacity: Baseline data
      • Resource usage: Data source for the Used field on the Hosts screen
      • Resource reservation: Data source for the Reserved field on the Hosts screen

      Resource Capacity

      In Kubernetes, a Node object is created for each host.

      The .status.allocatable.cpu and .status.allocatable.memory represent the available CPU and Memory resources of a host.

      # kubectl get nodes -A -oyaml
      apiVersion: v1
      items:
      - apiVersion: v1
      kind: Node
      metadata:
      ..
      management.cattle.io/pod-limits: '{"cpu":"12715m","devices.kubevirt.io/kvm":"1","devices.kubevirt.io/tun":"1","devices.kubevirt.io/vhost-net":"1","memory":"17104951040"}'
      management.cattle.io/pod-requests: '{"cpu":"5657m","devices.kubevirt.io/kvm":"1","devices.kubevirt.io/tun":"1","devices.kubevirt.io/vhost-net":"1","ephemeral-storage":"50M","memory":"9155862208","pods":"78"}'
      node.alpha.kubernetes.io/ttl: "0"
      ..
      name: harv41
      resourceVersion: "2170215"
      uid: b6f5850a-2fbc-4aef-8fbe-121dfb671b67
      spec:
      podCIDR: 10.52.0.0/24
      podCIDRs:
      - 10.52.0.0/24
      providerID: rke2://harv41
      status:
      addresses:
      - address: 192.168.122.141
      type: InternalIP
      - address: harv41
      type: Hostname
      allocatable:
      cpu: "10"
      devices.kubevirt.io/kvm: 1k
      devices.kubevirt.io/tun: 1k
      devices.kubevirt.io/vhost-net: 1k
      ephemeral-storage: "149527126718"
      hugepages-1Gi: "0"
      hugepages-2Mi: "0"
      memory: 20464216Ki
      pods: "200"
      capacity:
      cpu: "10"
      devices.kubevirt.io/kvm: 1k
      devices.kubevirt.io/tun: 1k
      devices.kubevirt.io/vhost-net: 1k
      ephemeral-storage: 153707984Ki
      hugepages-1Gi: "0"
      hugepages-2Mi: "0"
      memory: 20464216Ki
      pods: "200"

      Resource Usage

      CPU and memory usage data is continuously collected and stored in the NodeMetrics object. Harvester reads the data from usage.cpu and usage.memory.

      # kubectl get NodeMetrics -A -oyaml
      apiVersion: v1
      items:
      - apiVersion: metrics.k8s.io/v1beta1
      kind: NodeMetrics
      metadata:
      ...
      name: harv41
      timestamp: "2024-01-23T12:04:44Z"
      usage:
      cpu: 891736742n
      memory: 9845008Ki
      window: 10.149s

      Resource Reservation

      Harvester dynamically calculates the resource limits and requests of all pods running on a host, and updates the information to the annotations of the NodeMetrics object.

            management.cattle.io/pod-limits: '{"cpu":"12715m",...,"memory":"17104951040"}'
      management.cattle.io/pod-requests: '{"cpu":"5657m",...,"memory":"9155862208"}'

      For more information, see Requests and Limits in the Kubernetes documentation.

      Storage

      Longhorn is the default Container Storage Interface (CSI) driver of Harvester, providing storage management features such as distributed block storage and tiering.

      Reserved Storage in Longhorn

      Longhorn allows you to specify the percentage of disk space that is not allocated to the default disk on each new Longhorn node. The default value is "30". For more information, see Storage Reserved Percentage For Default Disk in the Longhorn documentation.

      Depending on the disk size, you can modify the default value using the embedded Longhorn UI.

      note

      Before changing the settings, read the Longhorn documentation carefully.

      Data Sources and Calculation

      Harvester uses the following data to calculate metrics for storage resources.

      • Sum of the storageMaximum values of all disks (status.diskStatus.disk-name): Total storage capacity

      • Total storage capacity - Sum of the storageAvailable values of all disks (status.diskStatus.disk-name): Data source for the Used field on the Hosts screen

      • Sum of the storageReserved values of all disks (spec.disks): Data source for the Reserved field on the Hosts screen

      # kubectl get nodes.longhorn.io -n longhorn-system -oyaml

      apiVersion: v1
      items:
      - apiVersion: longhorn.io/v1beta2
      kind: Node
      metadata:
      ..
      name: harv41
      namespace: longhorn-system
      ..
      spec:
      allowScheduling: true
      disks:
      default-disk-ef11a18c36b01132:
      allowScheduling: true
      diskType: filesystem
      evictionRequested: false
      path: /var/lib/harvester/defaultdisk
      storageReserved: 24220101427
      tags: []
      ..
      status:
      ..
      diskStatus:
      default-disk-ef11a18c36b01132:
      ..
      diskType: filesystem
      diskUUID: d2788933-8817-44c6-b688-dee414cc1f73
      scheduledReplica:
      pvc-95561210-c39c-4c2e-ac9a-4a9bd72b3100-r-20affeca: 2147483648
      pvc-9e83b2dc-6a4b-4499-ba70-70dc25b2d9aa-r-4ad05c86: 32212254720
      pvc-bc25be1e-ca4e-4818-a16d-48353a0f2f96-r-c7b88c60: 3221225472
      pvc-d9d3e54d-8d67-4740-861e-6373f670f1e4-r-f4c7c338: 2147483648
      pvc-e954b5fe-bbd7-4d44-9866-6ff6684d5708-r-ba6b87b6: 5368709120
      storageAvailable: 77699481600
      storageMaximum: 80733671424
      storageScheduled: 45097156608
      region: ""
      snapshotCheckStatus: {}
      zone: ""

      · 2 min read
      David Ko
      Jillian Maroket

      The Longhorn documentation provides best practice recommendations for deploying Longhorn in production environments. Before configuring workloads, ensure that you have set up the following basic requirements for optimal disk performance.

      • SATA/NVMe SSDs or disk drives with similar performance
      • 10 Gbps network bandwidth between nodes
      • Dedicated Priority Classes for system-managed and user-deployed Longhorn components

      The following sections outline other recommendations for achieving optimal disk performance.

      IO Performance

      • Storage network: Use a dedicated storage network to improve IO performance and stability.

      • Longhorn disk: Use a dedicated disk for Longhorn storage instead of using the root disk.

      • Replica count: Set the default replica count to "2" to achieve data availability with better disk space usage or less impact to system performance. This practice is especially beneficial to data-intensive applications.

      • Storage tag: Use storage tags to define storage tiering for data-intensive applications. For example, only high-performance disks can be used for storing performance-sensitive data. You can either add disks with tags or create StorageClasses with tags.

      • Data locality: Use best-effort as the default data locality of Longhorn Storage Classes.

        For applications that support data replication (for example, a distributed database), you can use the strict-local option to ensure that only one replica is created for each volume. This practice prevents the extra disk space usage and IO performance overhead associated with volume replication.

        For data-intensive applications, you can use pod scheduling functions such as node selector or taint toleration. These functions allow you to schedule the workload to a specific storage-tagged node together with one replica.

      Space Efficiency

      • Recurring snapshots: Periodically clean up system-generated snapshots and retain only the number of snapshots that makes sense for your implementation.

        For applications with replication capability, periodically delete all types of snapshots.

      Disaster Recovery

      • Recurring backups: Create recurring backup jobs for mission-critical application volumes.

      • System backup: Run periodic system backups.

      · 11 min read
      Jian Wang

      In Harvester, the VM Live Migration is well supported by the UI. Please refer to Harvester VM Live Migration for more details.

      The VM Live Migration process is finished smoothly in most cases. However, sometimes the migration may get stuck and not end as expected.

      This article dives into the VM Live Migration process in more detail. There are three main parts:

      • General Process of VM Live Migration
      • VM Live Migration Strategies
      • VM Live Migration Configurations

      Related issues:

      note

      A big part of the following contents are copied from kubevirt document https://kubevirt.io/user-guide/operations/live_migration/, some contents/formats are adjusted to fit in this document.

      General Process of VM Live Migration

      Starting a Migration from Harvester UI

      1. Go to the Virtual Machines page.
      2. Find the virtual machine that you want to migrate and select > Migrate.
      3. Choose the node to which you want to migrate the virtual machine and select Apply.

      After successfully selecting Apply, a CRD VirtualMachineInstanceMigration object is created, and the related controller/operator will start the process.

      Migration CRD Object

      You can also create the CRD VirtualMachineInstanceMigration object manually via kubectl or other tools.

      The example below starts a migration process for a virtual machine instance (VMI) new-vm.

      apiVersion: kubevirt.io/v1
      kind: VirtualMachineInstanceMigration
      metadata:
      name: migration-job
      spec:
      vmiName: new-vm

      Under the hood, the open source projects Kubevirt, Libvirt, QEMU, ... perform most of the VM Live Migration. References.

      Migration Status Reporting

      When starting a virtual machine instance (VMI), it has also been calculated whether the machine is live migratable. The result is being stored in the VMI VMI.status.conditions. The calculation can be based on multiple parameters of the VMI, however, at the moment, the calculation is largely based on the Access Mode of the VMI volumes. Live migration is only permitted when the volume access mode is set to ReadWriteMany. Requests to migrate a non-LiveMigratable VMI will be rejected.

      The reported Migration Method is also being calculated during VMI start. BlockMigration indicates that some of the VMI disks require copying from the source to the destination. LiveMigration means that only the instance memory will be copied.

      Status:
      Conditions:
      Status: True
      Type: LiveMigratable
      Migration Method: BlockMigration

      Migration Status

      The migration progress status is reported in VMI.status. Most importantly, it indicates whether the migration has been completed or failed.

      Below is an example of a successful migration.

      Migration State:
      Completed: true
      End Timestamp: 2019-03-29T03:37:52Z
      Migration Config:
      Completion Timeout Per GiB: 800
      Progress Timeout: 150
      Migration UID: c64d4898-51d3-11e9-b370-525500d15501
      Source Node: node02
      Start Timestamp: 2019-03-29T04:02:47Z
      Target Direct Migration Node Ports:
      35001: 0
      41068: 49152
      38284: 49153
      Target Node: node01
      Target Node Address: 10.128.0.46
      Target Node Domain Detected: true
      Target Pod: virt-launcher-testvmimcbjgw6zrzcmp8wpddvztvzm7x2k6cjbdgktwv8tkq

      VM Live Migration Strategies

      VM Live Migration is a process during which a running Virtual Machine Instance moves to another compute node while the guest workload continues to run and remain accessible.

      Understanding Different VM Live Migration Strategies

      VM Live Migration is a complex process. During a migration, the source VM needs to transfer its whole state (mainly RAM) to the target VM. If there are enough resources available, such as network bandwidth and CPU power, migrations should converge nicely. If this is not the scenario, however, the migration might get stuck without an ability to progress.

      The main factor that affects migrations from the guest perspective is its dirty rate, which is the rate by which the VM dirties memory. Guests with high dirty rate lead to a race during migration. On the one hand, memory would be transferred continuously to the target, and on the other, the same memory would get dirty by the guest. On such scenarios, one could consider to use more advanced migration strategies. Refer to Understanding different migration strategies for more details.

      There are 3 VM Live Migration strategies/policies:

      VM Live Migration Strategy: Pre-copy

      Pre-copy is the default strategy. It should be used for most cases.

      The way it works is as following:

      1. The target VM is created, but the guest keeps running on the source VM.
      2. The source starts sending chunks of VM state (mostly memory) to the target. This continues until all of the state has been transferred to the target.
      3. The guest starts executing on the target VM. 4. The source VM is being removed.

      Pre-copy is the safest and fastest strategy for most cases. Furthermore, it can be easily cancelled, can utilize multithreading, and more. If there is no real reason to use another strategy, this is definitely the strategy to go with.

      However, on some cases migrations might not converge easily, that is, by the time the chunk of source VM state would be received by the target VM, it would already be mutated by the source VM (which is the VM the guest executes on). There are many reasons for migrations to fail converging, such as a high dirty-rate or low resources like network bandwidth and CPU. On such scenarios, see the following alternative strategies below.

      VM Live Migration Strategy: Post-copy

      The way post-copy migrations work is as following:

      1. The target VM is created.
      2. The guest is being run on the target VM.
      3. The source starts sending chunks of VM state (mostly memory) to the target.
      4. When the guest, running on the target VM, would access memory: 1. If the memory exists on the target VM, the guest can access it. 2. Otherwise, the target VM asks for a chunk of memory from the source VM.
      5. Once all of the memory state is updated at the target VM, the source VM is being removed.

      The main idea here is that the guest starts to run immediately on the target VM. This approach has advantages and disadvantages:

      Advantages:

      • The same memory chink is never being transferred twice. This is possible due to the fact that with post-copy it doesn't matter that a page had been dirtied since the guest is already running on the target VM.
      • This means that a high dirty-rate has much less effect.
      • Consumes less network bandwidth.

      Disadvantages:

      • When using post-copy, the VM state has no one source of truth. When the guest (running on the target VM) writes to memory, this memory is one part of the guest's state, but some other parts of it may still be updated only at the source VM. This situation is generally dangerous, since, for example, if either the target or guest VMs crash the state cannot be recovered.
      • Slow warmup: when the guest starts executing, no memory is present at the target VM. Therefore, the guest would have to wait for a lot of memory in a short period of time.
      • Slower than pre-copy on most cases.
      • Harder to cancel a migration.

      VM Live Migration Strategy: Auto-converge

      Auto-converge is a technique to help pre-copy migrations converge faster without changing the core algorithm of how the migration works.

      Since a high dirty-rate is usually the most significant factor for migrations to not converge, auto-converge simply throttles the guest's CPU. If the migration would converge fast enough, the guest's CPU would not be throttled or throttled negligibly. But, if the migration would not converge fast enough, the CPU would be throttled more and more as time goes.

      This technique dramatically increases the probability of the migration converging eventually.

      Observe the VM Live Migration Progress and Result

      Migration Timeouts

      Depending on the type, the live migration process will copy virtual machine memory pages and disk blocks to the destination. During this process non-locked pages and blocks are being copied and become free for the instance to use again. To achieve a successful migration, it is assumed that the instance will write to the free pages and blocks (pollute the pages) at a lower rate than these are being copied.

      Completion Time

      In some cases the virtual machine can write to different memory pages / disk blocks at a higher rate than these can be copied, which will prevent the migration process from completing in a reasonable amount of time. In this case, live migration will be aborted if it is running for a long period of time. The timeout is calculated base on the size of the VMI, it's memory and the ephemeral disks that are needed to be copied. The configurable parameter completionTimeoutPerGiB, which defaults to 800s is the time for GiB of data to wait for the migration to be completed before aborting it. A VMI with 8Gib of memory will time out after 6400 seconds.

      Progress Timeout

      A VM Live Migration will also be aborted when it notices that copying memory doesn't make any progress. The time to wait for live migration to make progress in transferring data is configurable by the progressTimeout parameter, which defaults to 150 seconds.

      VM Live Migration Configurations

      Changing Cluster Wide Migration Limits

      KubeVirt puts some limits in place so that migrations don't overwhelm the cluster. By default, it is to only run 5 migrations in parallel with an additional limit of a maximum of 2 outbound migrations per node. Finally, every migration is limited to a bandwidth of 64MiB/s.

      You can change these values in the kubevirt CR:

          apiVersion: kubevirt.io/v1
      kind: Kubevirt
      metadata:
      name: kubevirt
      namespace: kubevirt
      spec:
      configuration:
      migrations:
      parallelMigrationsPerCluster: 5
      parallelOutboundMigrationsPerNode: 2
      bandwidthPerMigration: 64Mi
      completionTimeoutPerGiB: 800
      progressTimeout: 150
      disableTLS: false
      nodeDrainTaintKey: "kubevirt.io/drain"
      allowAutoConverge: false ---------------------> related to: Auto-converge
      allowPostCopy: false -------------------------> related to: Post-copy
      unsafeMigrationOverride: false

      Remember that most of these configurations can be overridden and fine-tuned to a specified group of VMs. For more information, please refer to the Migration Policies section below.

      Migration Policies

      Migration policies provides a new way of applying migration configurations to Virtual Machines. The policies can refine Kubevirt CR's MigrationConfiguration that sets the cluster-wide migration configurations. This way, the cluster-wide settings default how the migration policy can be refined (i.e., changed, removed, or added).

      Remember that migration policies are in version v1alpha1. This means that this API is not fully stable yet and that APIs may change in the future.

      Migration Configurations

      Currently, the MigrationPolicy spec only includes the following configurations from Kubevirt CR's MigrationConfiguration. (In the future, more configurations that aren't part of Kubevirt CR will be added):

      apiVersion: migrations.kubevirt.io/v1alpha1
      kind: MigrationPolicy
      spec:
      allowAutoConverge: true
      bandwidthPerMigration: 217Ki
      completionTimeoutPerGiB: 23
      allowPostCopy: false

      All the above fields are optional. When omitted, the configuration will be applied as defined in KubevirtCR's MigrationConfiguration. This way, KubevirtCR will serve as a configurable set of defaults for both VMs that are not bound to any MigrationPolicy and VMs that are bound to a MigrationPolicy that does not define all fields of the configurations.

      Matching Policies to VMs

      Next in the spec are the selectors defining the group of VMs to apply the policy. The options to do so are the following.

      This policy applies to the VMs in namespaces that have all the required labels:

      apiVersion: migrations.kubevirt.io/v1alpha1
      kind: MigrationPolicy
      spec:
      selectors:
      namespaceSelector:
      hpc-workloads: true # Matches a key and a value

      The policy below applies to the VMs that have all the required labels:

      apiVersion: migrations.kubevirt.io/v1alpha1
      kind: MigrationPolicy
      spec:
      selectors:
      virtualMachineInstanceSelector:
      workload-type: db # Matches a key and a value

      References

      Documents

      Libvirt Guest Migration

      Libvirt has a chapter to describe the pricipal of VM/Guest Live Migration.

      https://libvirt.org/migration.html

      Kubevirt Live Migration

      https://kubevirt.io/user-guide/operations/live_migration/

      Source Code

      The VM Live Migration related configuration options are passed to each layer correspondingly.

      Kubevirt

      https://github.com/kubevirt/kubevirt/blob/d425593ae392111dab80403ef0cde82625e37653/pkg/virt-launcher/virtwrap/live-migration-source.go#L103

      ...
      import "libvirt.org/go/libvirt"

      ...

      func generateMigrationFlags(isBlockMigration, migratePaused bool, options *cmdclient.MigrationOptions) libvirt.DomainMigrateFlags {
      ...
      if options.AllowAutoConverge {
      migrateFlags |= libvirt.MIGRATE_AUTO_CONVERGE
      }
      if options.AllowPostCopy {
      migrateFlags |= libvirt.MIGRATE_POSTCOPY
      }
      ...
      }

      Go Package Libvirt

      https://pkg.go.dev/libvirt.org/go/libvirt

      const (
      ...
      MIGRATE_AUTO_CONVERGE = DomainMigrateFlags(C.VIR_MIGRATE_AUTO_CONVERGE)
      MIGRATE_RDMA_PIN_ALL = DomainMigrateFlags(C.VIR_MIGRATE_RDMA_PIN_ALL)
      MIGRATE_POSTCOPY = DomainMigrateFlags(C.VIR_MIGRATE_POSTCOPY)
      ...
      )

      Libvirt

      https://github.com/libvirt/libvirt/blob/bfe53e9145cd5996a791c5caff0686572b850f82/include/libvirt/libvirt-domain.h#L1030

          /* Enable algorithms that ensure a live migration will eventually converge.
      * This usually means the domain will be slowed down to make sure it does
      * not change its memory faster than a hypervisor can transfer the changed
      * memory to the destination host. VIR_MIGRATE_PARAM_AUTO_CONVERGE_*
      * parameters can be used to tune the algorithm.
      *
      * Since: 1.2.3
      */
      VIR_MIGRATE_AUTO_CONVERGE = (1 << 13),
      ...
      /* Setting the VIR_MIGRATE_POSTCOPY flag tells libvirt to enable post-copy
      * migration. However, the migration will start normally and
      * virDomainMigrateStartPostCopy needs to be called to switch it into the
      * post-copy mode. See virDomainMigrateStartPostCopy for more details.
      *
      * Since: 1.3.3
      */
      VIR_MIGRATE_POSTCOPY = (1 << 15),

      · 4 min read
      Hang Yu

      Starting with Harvester v1.2.0, it offers the capability to install a Container Storage Interface (CSI) in your Harvester cluster. This allows you to leverage external storage for the Virtual Machine's non-system data disk, giving you the flexibility to use different drivers tailored for specific needs, whether it's for performance optimization or seamless integration with your existing in-house storage solutions.

      It's important to note that, despite this enhancement, the provisioner for the Virtual Machine (VM) image in Harvester still relies on Longhorn. Prior to version 1.2.0, Harvester exclusively supported Longhorn for storing VM data and did not offer support for external storage as a destination for VM data.

      One of the options for integrating external storage with Harvester is Rook, an open-source cloud-native storage orchestrator. Rook provides a robust platform, framework, and support for Ceph storage, enabling seamless integration with cloud-native environments.

      Ceph is a software-defined distributed storage system that offers versatile storage capabilities, including file, block, and object storage. It is designed for large-scale production clusters and can be deployed effectively in such environments.

      Rook simplifies the deployment and management of Ceph, offering self-managing, self-scaling, and self-healing storage services. It leverages Kubernetes resources to automate the deployment, configuration, provisioning, scaling, upgrading, and monitoring of Ceph.

      In this article, we will walk you through the process of installing, configuring, and utilizing Rook to use storage from an existing external Ceph cluster as a data disk for a VM within the Harvester environment.

      Install Harvester Cluster

      Harvester's operating system follows an immutable design, meaning that most OS files revert to their pre-configured state after a reboot. To accommodate Rook Ceph's requirements, you need to add specific persistent paths to the os.persistentStatePaths section in the Harvester configuration. These paths include:

      os:
      persistent_state_paths:
      - /var/lib/rook
      - /var/lib/ceph
      modules:
      - rbd
      - nbd

      After the cluster is installed, refer to How can I access the kubeconfig file of the Harvester cluster? to get the kubeconfig of the Harvester cluster.

      Install Rook to Harvester

      Install Rook to the Harvester cluster by referring to Rook Quickstart.

      curl -fsSLo rook.tar.gz https://github.com/rook/rook/archive/refs/tags/v1.12.2.tar.gz \
      && tar -zxf rook.tar.gz && cd rook-1.12.2/deploy/examples
      # apply configurations ref: https://rook.github.io/docs/rook/v1.12/Getting-Started/example-configurations/
      kubectl apply -f crds.yaml -f common.yaml -f operator.yaml
      kubectl -n rook-ceph wait --for=condition=Available deploy rook-ceph-operator --timeout=10m

      Using an existing external Ceph cluster

      1. Run the python script create-external-cluster-resources.py in the existing external Ceph cluster for creating all users and keys.
      # script help ref: https://www.rook.io/docs/rook/v1.12/CRDs/Cluster/external-cluster/#1-create-all-users-and-keys
      curl -s https://raw.githubusercontent.com/rook/rook/v1.12.2/deploy/examples/create-external-cluster-resources.py > create-external-cluster-resources.py
      python3 create-external-cluster-resources.py --rbd-data-pool-name <pool_name> --namespace rook-ceph-external --format bash
      1. Copy the Bash output.

      Example output:

      export NAMESPACE=rook-ceph-external
      export ROOK_EXTERNAL_FSID=b3b47828-4c60-11ee-be38-51902f85c805
      export ROOK_EXTERNAL_USERNAME=client.healthchecker
      export ROOK_EXTERNAL_CEPH_MON_DATA=ceph-1=192.168.5.99:6789
      export ROOK_EXTERNAL_USER_SECRET=AQDd6/dkFyu/IhAATv/uCMbHtWk4AYK2KXzBhQ==
      export ROOK_EXTERNAL_DASHBOARD_LINK=https://192.168.5.99:8443/
      export CSI_RBD_NODE_SECRET=AQDd6/dk2HsjIxAA06Yw9UcOg0dfwV/9IFBRhA==
      export CSI_RBD_NODE_SECRET_NAME=csi-rbd-node
      export CSI_RBD_PROVISIONER_SECRET=AQDd6/dkEY1kIxAAAzrXZnVRf4x+wDUz1zyaQg==
      export CSI_RBD_PROVISIONER_SECRET_NAME=csi-rbd-provisioner
      export MONITORING_ENDPOINT=192.168.5.99
      export MONITORING_ENDPOINT_PORT=9283
      export RBD_POOL_NAME=test
      export RGW_POOL_PREFIX=default
      1. Consume the external Ceph cluster resources on the Harvester cluster.
      # Paste the above output from create-external-cluster-resources.py into import-env.sh
      vim import-env.sh
      source import-env.sh
      # this script will create a StorageClass ceph-rbd
      source import-external-cluster.sh
      kubectl apply -f common-external.yaml
      kubectl apply -f cluster-external.yaml
      # wait for all pods to become Ready
      watch 'kubectl --namespace rook-ceph get pods'
      1. Create the VolumeSnapshotClass csi-rbdplugin-snapclass-external.
      cat >./csi/rbd/snapshotclass-external.yaml <<EOF
      ---
      apiVersion: snapshot.storage.k8s.io/v1
      kind: VolumeSnapshotClass
      metadata:
      name: csi-rbdplugin-snapclass-external
      driver: rook-ceph.rbd.csi.ceph.com # driver:namespace:operator
      parameters:
      clusterID: rook-ceph-external # namespace:cluster
      csi.storage.k8s.io/snapshotter-secret-name: rook-csi-rbd-provisioner
      csi.storage.k8s.io/snapshotter-secret-namespace: rook-ceph-external # namespace:cluster
      deletionPolicy: Delete
      EOF

      kubectl apply -f ./csi/rbd/snapshotclass-external.yaml

      Configure Harvester Cluster

      Before you can make use of Harvester's Backup & Snapshot features, you need to set up some essential configurations through the Harvester csi-driver-config setting. To set up these configurations, follow these steps:

      1. Login to the Harvester UI, then navigate to Advanced > Settings.
      2. Find and select csi-driver-config, and then click on the > Edit Setting to access the configuration options.
      3. In the settings, set the Provisioner to rook-ceph.rbd.csi.ceph.com.
      4. Next, specify the Volume Snapshot Class Name as csi-rbdplugin-snapclass-external. This setting points to the name of the VolumeSnapshotClass used for creating volume snapshots or VM snapshots.
      5. Similarly, set the Backup Volume Snapshot Class Name to csi-rbdplugin-snapclass-external. This corresponds to the name of the VolumeSnapshotClass responsible for creating VM backups.

      csi-driver-config-external

      Use Rook Ceph in Harvester

      After successfully configuring these settings, you can proceed to utilize the Rook Ceph StorageClass, which is named rook-ceph-block for the internal Ceph cluster or named ceph-rbd for the external Ceph cluster. You can apply this StorageClass when creating an empty volume or adding a new block volume to a VM, enhancing your Harvester cluster's storage capabilities.

      With these configurations in place, your Harvester cluster is ready to make the most of the Rook Ceph storage integration.

      rook-ceph-volume-external

      rook-ceph-vm-external

      · 3 min read
      Canwu Yao

      As Harvester v1.2.0 is released, a new Harvester cloud provider version 0.2.2 is integrated into RKE2 v1.24.15+rke2r1, v1.25.11+rke2r1, v1.26.6+rke2r1, v1.27.3+rke2r1, and newer versions.

      With Harvester v1.2.0, the new Harvester cloud provider offers enhanced load balancing capabilities for guest Kubernetes services. Specifically, it introduces the Harvester IP Pool feature, a built-in IP address management (IPAM) solution for the Harvester load balancer. It allows you to define an IP pool specific to a particular guest cluster by specifying the guest cluster name. For example, you can create an IP pool exclusively for the guest cluster named cluster2:

      image

      However, after upgrading, the feature is not automatically compatible with existing guest Kubernetes clusters, as they do not pass the correct cluster name to the Harvester cloud provider. Refer to issue 4232 for more details. Users can manually upgrade the Harvester cloud provider using Helm as a workaround and provide the correct cluster name after upgrading. However, this would result in a change in the load balancer IPs.

      This article outlines a workaround that allows you to leverage the new IP pool feature while keeping the load balancer IPs unchanged.

      Prerequisites

      • Download the Harvester kubeconfig file from the Harvester UI. If you have imported Harvester into Rancher, do not use the kubeconfig file from the Rancher UI. Refer to Access Harvester Cluster to get the desired one.

      • Download the kubeconfig file for the guest Kubernetes cluster you plan to upgrade. Refer to Accessing Clusters with kubectl from Your Workstation for instructions on how to download the kubeconfig file.

      Steps to Keep Load Balancer IP

      1. Execute the following script before upgrading.

        curl -sfL https://raw.githubusercontent.com/harvester/harvesterhci.io/main/kb/2023-08-21/keepip.sh | sh -s before_upgrade <Harvester-kubeconfig-path> <guest-cluster-kubeconfig-path> <guest-cluster-name> <guest-cluster-nodes-namespace>
        • <Harvester-kubeconfig-path>: Path to the Harvester kubeconfig file.
        • <guest-cluster-kubeconfig-path>: Path to the kubeconfig file of your guest Kubernetes cluster.
        • <guest-cluster-name>: Name of your guest cluster.
        • <guest-cluster-nodes-namespace>: Namespace where the VMs of the guest cluster are located.

        The script will help users copy the DHCP information to the service annotation and modify the IP pool allocated history to make sure the IP is unchanged.

        image

        After executing the script, the load balancer service with DHCP mode will be annotated with the DHCP information. For example:

        apiVersion: v1
        kind: Service
        metadata:
        annotations:
        kube-vip.io/hwaddr: 00:00:6c:4f:18:68
        kube-vip.io/requestedIP: 172.19.105.215
        name: lb0
        namespace: default

        As for the load balancer service with pool mode, the IP pool allocated history will be modified as the new load balancer name. For example:

        apiVersion: loadbalancer.harvesterhci.io/v1beta1
        kind: IPPool
        metadata:
        name: default
        spec:
        ...
        status:
        allocatedHistory:
        192.168.100.2: default/cluster-name-default-lb1-ddc13071 # replace the new load balancer name
      2. Add network selector for the pool.

        For example, the following cluster is under the VM network default/mgmt-untagged. The network selector should be default/mgmt-untagged.

        image

        image

      3. Upgrade the RKE2 cluster in the Rancher UI and select the new version.

        image

      4. Execute the script after upgrading.

        curl -sfL https://raw.githubusercontent.com/harvester/harvesterhci.io/main/kb/2023-08-21/keepip.sh | sh -s after_upgrade <Harvester-kubeconfig-path> <guest-cluster-kubeconfig-path> <guest-cluster-name> <guest-cluster-nodes-namespace>

        image

        In this step, the script wraps the operations to upgrade the Harvester cloud provider to set the cluster name. After the Harvester cloud provider is running, the new Harvester load balancers will be created with the unchanged IPs.

      · 7 min read

      This article covers instructions for installing the Netapp Astra Trident CSI driver into a Harvester cluster, which enables NetApp storage systems to store storage volumes usable by virtual machines running in Harvester.

      The NetApp storage will be an option in addition to the normal Longhorn storage; it will not replace Longhorn. Virtual machine images will still be stored using Longhorn.

      This has been tested with Harvester 1.2.0 and Trident v23.07.0.

      This procedure only works to access storage via iSCSI, not NFS.

      note

      3rd party storage classes (including those based on Trident) can only be used for non-boot volumes of Harvester VMs.

      Detailed Instructions

      We assume that before beginning this procedure, a Harvester cluster and a NetApp ONTAP storage system are both installed and configured for use.

      Most of these steps can be performed on any system with the helm and kubectl commands installed and network connectivity to the management port of the Harvester cluster. Let's call this your workstation. Certain steps must be performed on one or more cluster nodes themselves. The steps described below should be done on your workstation unless otherwise indicated.

      The last step (enabling multipathd) should be done on all nodes after the Trident CSI has been installed.

      Certain parameters of your installation will require modification of details in the examples in the procedure given below. Those which you may wish to modify include:

      • The namespace. trident is used as the namespace in the examples, but you may prefer to use another.
      • The name of the deployment. mytrident is used but you can change this to something else.
      • The management IP address of the ONTAP storage system
      • Login credentials (username and password) of the ONTAP storage system

      The procedure is as follows.

      1. Read the NetApp Astra Trident documentation:

        The simplest method is to install using Helm; that process is described here.

      2. Download the KubeConfig from the Harvester cluster.

        • Open the web UI for your Harvester cluster
        • In the lower left corner, click the "Support" link. This will take you to a "Harvester Support" page.
        • Click the button labeled "Download KubeConfig". This will download a your cluster config in a file called "local.yaml" by default.
        • Move this file to a convenient location and set your KUBECONFIG environment variable to the path of this file.
      3. Prepare the cluster for installation of the Helm chart.

        Before starting installation of the helm chart, special authorization must be provided to enable certain modifications to be made during the installation. This addresses the issue described here: https://github.com/NetApp/trident/issues/839

        • Put the following text into a file. For this example we'll call it authorize_trident.yaml.

          ---
          apiVersion: rbac.authorization.k8s.io/v1
          kind: ClusterRole
          metadata:
          name: trident-operator-psa
          rules:
          - apiGroups:
          - management.cattle.io
          resources:
          - projects
          verbs:
          - updatepsa
          ---
          apiVersion: rbac.authorization.k8s.io/v1
          kind: ClusterRoleBinding
          metadata:
          name: trident-operator-psa
          roleRef:
          apiGroup: rbac.authorization.k8s.io
          kind: ClusterRole
          name: trident-operator-psa
          subjects:
          - kind: ServiceAccount
          name: trident-operator
          namespace: trident
        • Apply this manifest via the command kubectl apply -f authorize_trident.yaml.

      4. Install the helm chart.

        • First you will need to add the Astra Trident Helm repository:

          helm repo add netapp-trident https://netapp.github.io/trident-helm-chart
        • Next, install the Helm chart. This example uses mytrident as the deployment name, trident as the namespace, and 23.07.0 as the version number to install:

          helm install mytrident netapp-trident/trident-operator --version 23.07.0 --create-namespace --namespace trident
        • The NetApp documentation describes variations on how you can do this.

      5. Download and extract the tridentctl command, which will be needed for the next few steps.

        This and the next few steps need to be performed logged into a master node of the Harvester cluster, using root access.

        cd /tmp
        curl -L -o trident-installer-23.07.0.tar.gz https://github.com/NetApp/trident/releases/download/v23.07.0/trident-installer-23.07.0.tar.gz
        tar -xf trident-installer-23.07.0.tar.gz
        cd trident-installer
      6. Install a backend.

        This part is specific to Harvester.

        1. Put the following into a text file, for example /tmp/backend.yaml

          version: 1
          backendName: default_backend_san
          storageDriverName: ontap-san-economy
          managementLIF: 172.19.97.114
          svm: default_backend
          username: admin
          password: password1234
          labels:
          name: default_backend_san

          The LIF IP address, username, and password of this file should be replaced with the management LIF and credentials for the ONTAP system.

        2. Create the backend

          ./tridentctl create backend -f /tmp/backend.yaml -n trident
        3. Check that it is created

          ./tridentctl get backend -n trident
      7. Define a StorageClass and SnapshotClass.

        1. Put the following into a file, for example /tmp/storage.yaml

          ---
          apiVersion: storage.k8s.io/v1
          kind: StorageClass
          metadata:
          name: ontap-san-economy
          provisioner: csi.trident.netapp.io
          parameters:
          selector: "name=default_backend_san"
          ---
          apiVersion: snapshot.storage.k8s.io/v1
          kind: VolumeSnapshotClass
          metadata:
          name: csi-snapclass
          driver: csi.trident.netapp.io
          deletionPolicy: Delete
        2. Apply the definitions:

          kubectl apply -f /tmp/storage.yaml
      8. Enable multipathd

        The following is required to enable multipathd. @@ -30,7 +30,7 @@ The recommended method is to "whitelist" the Trident devices using device properties rather than device naming. The properties to allow are the device vendor and product. Here is an example of what you'll want in /etc/multipath.conf:

        blacklist {
        device {
        vendor "!NETAPP"
        product "!LUN"
        }
        }
        blacklist_exceptions {
        device {
        vendor "NETAPP"
        product "LUN"
        }
        }

        This example only works if NetApp is the only storage provider in the system for which multipathd must be used. More complex environments will require more complex configuration.

        Explicitly putting that content into /etc/multipath.conf will work when you start multipathd as described below, but the change in /etc will not persist across node reboots. To solve that problem, you should add another file to /oem that will re-generate /etc/multipath.conf when the node reboots. The following example will create the /etc/multipath.conf given in the example above, but may need to be modified for your environment if you have a more complex iSCSI configuration:

        stages:
        initramfs:
        - name: "Configure multipath blacklist and whitelist"
        files:
        - path: /etc/multipath.conf
        permissions: 0644
        owner: 0
        group: 0
        content: |
        blacklist {
        device {
        vendor "!NETAPP"
        product "!LUN"
        }
        }
        blacklist_exceptions {
        device {
        vendor "NETAPP"
        product "LUN"
        }
        }

        Remember, this has to be done on every node.

      9. Enable multipathd.

        Adding the above files to /oem will take effect on the next reboot of the node; multipathd can be enabled immediately without rebooting the node using the following commands:

        systemctl enable multipathd
        systemctl start multipathd

        After the above steps, the ontap-san-economy storage class should be available when creating a volume for a Harvester VM.

    · 7 min read
    Kiefer Chang

    Harvester v1.2.0 introduces a new enhancement where Longhorn system-managed components in newly-deployed clusters are automatically assigned a system-cluster-critical priority class by default. However, when upgrading your Harvester clusters from previous versions, you may notice that Longhorn system-managed components do not have any priority class set.

    This behavior is intentional and aimed at supporting zero-downtime upgrades. Longhorn does not allow changing the priority-class setting when attached volumes exist. For more details, please refer to Setting Priority Class During Longhorn Installation).

    This article explains how to manually configure priority classes for Longhorn system-managed components after upgrading your Harvester cluster, ensuring that your Longhorn components have the appropriate priority class assigned and maintaining the stability and performance of your system.

    Stop all virtual machines

    Stop all virtual machines (VMs) to detach all volumes. Please back up any work before doing this.

    1. Login to a Harvester controller node and become root.

    2. Get all running VMs and write down their namespaces and names:

      kubectl get vmi -A

      Alternatively, you can get this information by backing up the Virtual Machine Instance (VMI) manifests with the following command:

      kubectl get vmi -A -o json > vmi-backup.json
    3. Shut down all VMs. Log in to all running VMs and shut them down gracefully (recommended). Or use the following command to send shutdown signals to all VMs:

      kubectl get vmi -A -o json | jq -r '.items[] | [.metadata.name, .metadata.namespace] | @tsv' | while IFS=$'\t' read -r name namespace; do
      if [ -z "$name" ]; then
      break
      fi
      echo "Stop ${namespace}/${name}"
      virtctl stop $name -n $namespace
      done
      note

      You can also stop all VMs from the Harvester UI:

      1. Go to the Virtual Machines page.
      2. For each VM, select > Stop.
    4. Ensure there are no running VMs:

      Run the command:

      kubectl get vmi -A

      The above command must return:

      No resources found

    Scale down monitoring pods

    1. Scale down the Prometheus deployment. Run the following command and wait for all Prometheus pods to terminate:

      kubectl patch -n cattle-monitoring-system prometheus/rancher-monitoring-prometheus --patch '{"spec": {"replicas": 0}}' --type merge && \
      sleep 5 && \
      kubectl rollout status --watch=true -n cattle-monitoring-system statefulset/prometheus-rancher-monitoring-prometheus

      A sample output looks like this:

      prometheus.monitoring.coreos.com/rancher-monitoring-prometheus patched
      statefulset rolling update complete 0 pods at revision prometheus-rancher-monitoring-prometheus-cbf6bd5f7...
    2. Scale down the AlertManager deployment. Run the following command and wait for all AlertManager pods to terminate:

      kubectl patch -n cattle-monitoring-system alertmanager/rancher-monitoring-alertmanager --patch '{"spec": {"replicas": 0}}' --type merge && \
      sleep 5 && \
      kubectl rollout status --watch=true -n cattle-monitoring-system statefulset/alertmanager-rancher-monitoring-alertmanager

      A sample output looks like this:

      alertmanager.monitoring.coreos.com/rancher-monitoring-alertmanager patched
      statefulset rolling update complete 0 pods at revision alertmanager-rancher-monitoring-alertmanager-c8c459dff...
    3. Scale down the Grafana deployment. Run the following command and wait for all Grafana pods to terminate:

      kubectl scale --replicas=0 deployment/rancher-monitoring-grafana -n cattle-monitoring-system && \
      sleep 5 && \
      kubectl rollout status --watch=true -n cattle-monitoring-system deployment/rancher-monitoring-grafana

      A sample output looks like this:

      deployment.apps/rancher-monitoring-grafana scaled
      deployment "rancher-monitoring-grafana" successfully rolled out

    Scale down vm-import-controller pods

    1. Check if the vm-import-controller addon is enabled and configured with a persistent volume with the following command:

      kubectl get pvc -n harvester-system harvester-vm-import-controller

      If the above command returns an output like this, you must scale down the vm-import-controller pod. Otherwise, you can skip the following step.

      NAME                             STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLASS         AGE
      harvester-vm-import-controller Bound pvc-eb23e838-4c64-4650-bd8f-ba7075ab0559 200Gi RWO harvester-longhorn 2m53s
    2. Scale down the vm-import-controller pods with the following command:

      kubectl scale --replicas=0 deployment/harvester-vm-import-controller -n harvester-system && \
      sleep 5 && \
      kubectl rollout status --watch=true -n harvester-system deployment/harvester-vm-import-controller

      A sample output looks like this:

      deployment.apps/harvester-vm-import-controller scaled
      deployment "harvester-vm-import-controller" successfully rolled out

    Set the priority-class setting

    1. Before applying the priority-class setting, you need to verify all volumes are detached. Run the following command to verify the STATE of each volume is detached:

      kubectl get volumes.longhorn.io -A

      Verify the output looks like this:

      NAMESPACE         NAME                                       STATE      ROBUSTNESS   SCHEDULED   SIZE           NODE   AGE
      longhorn-system pvc-5743fd02-17a3-4403-b0d3-0e9b401cceed detached unknown 5368709120 15d
      longhorn-system pvc-7e389fe8-984c-4049-9ba8-5b797cb17278 detached unknown 53687091200 15d
      longhorn-system pvc-8df64e54-ecdb-4d4e-8bab-28d81e316b8b detached unknown 2147483648 15d
      longhorn-system pvc-eb23e838-4c64-4650-bd8f-ba7075ab0559 detached unknown 214748364800 11m
    2. Set the priority-class setting with the following command:

      kubectl patch -n longhorn-system settings.longhorn.io priority-class --patch '{"value": "system-cluster-critical"}' --type merge

      Longhorn system-managed pods will restart and then you need to check if all the system-managed components have a priority class set:

      Get the value of the priority class system-cluster-critical:

      kubectl get priorityclass system-cluster-critical

      Verify the output looks like this:

      NAME                      VALUE        GLOBAL-DEFAULT   AGE
      system-cluster-critical 2000000000 false 15d
    3. Use the following command to get pods' priority in the longhorn-system namespace:

      kubectl get pods -n longhorn-system -o custom-columns="Name":metadata.name,"Priority":.spec.priority
    4. Verify all system-managed components' pods have the correct priority. System-managed components include:

      • csi-attacher
      • csi-provisioner
      • csi-resizer
      • csi-snapshotter
      • engine-image-ei
      • instance-manager-e
      • instance-manager-r
      • longhorn-csi-plugin

    Scale up vm-import-controller pods

    If you scale down the vm-import-controller pods, you must scale it up again.

    1. Scale up the vm-import-controller pod. Run the command:

      kubectl scale --replicas=1 deployment/harvester-vm-import-controller -n harvester-system && \
      sleep 5 && \
      kubectl rollout status --watch=true -n harvester-system deployment/harvester-vm-import-controller

      A sample output looks like this:

      deployment.apps/harvester-vm-import-controller scaled
      Waiting for deployment "harvester-vm-import-controller" rollout to finish: 0 of 1 updated replicas are available...
      deployment "harvester-vm-import-controller" successfully rolled out
    2. Verify vm-import-controller is running using the following command:

      kubectl get pods --selector app.kubernetes.io/instance=vm-import-controller -A

      A sample output looks like this, the pod's STATUS must be Running:

      NAMESPACE          NAME                                              READY   STATUS    RESTARTS   AGE
      harvester-system harvester-vm-import-controller-6bd8f44f55-m9k86 1/1 Running 0 4m53s

    Scale up monitoring pods

    1. Scale up the Prometheus deployment. Run the following command and wait for all Prometheus pods to roll out:

      kubectl patch -n cattle-monitoring-system prometheus/rancher-monitoring-prometheus --patch '{"spec": {"replicas": 1}}' --type merge && \
      sleep 5 && \
      kubectl rollout status --watch=true -n cattle-monitoring-system statefulset/prometheus-rancher-monitoring-prometheus

      A sample output looks like:

      prometheus.monitoring.coreos.com/rancher-monitoring-prometheus patched
      Waiting for 1 pods to be ready...
      statefulset rolling update complete 1 pods at revision prometheus-rancher-monitoring-prometheus-cbf6bd5f7...
    2. Scale down the AlertManager deployment. Run the following command and wait for all AlertManager pods to roll out:

      kubectl patch -n cattle-monitoring-system alertmanager/rancher-monitoring-alertmanager --patch '{"spec": {"replicas": 1}}' --type merge && \
      sleep 5 && \
      kubectl rollout status --watch=true -n cattle-monitoring-system statefulset/alertmanager-rancher-monitoring-alertmanager

      A sample output looks like this:

      alertmanager.monitoring.coreos.com/rancher-monitoring-alertmanager patched
      Waiting for 1 pods to be ready...
      statefulset rolling update complete 1 pods at revision alertmanager-rancher-monitoring-alertmanager-c8bd4466c...
    3. Scale down the Grafana deployment. Run the following command and wait for all Grafana pods to roll out:

      kubectl scale --replicas=1 deployment/rancher-monitoring-grafana -n cattle-monitoring-system && \
      sleep 5 && \
      kubectl rollout status --watch=true -n cattle-monitoring-system deployment/rancher-monitoring-grafana

      A sample output looks like this:

      deployment.apps/rancher-monitoring-grafana scaled
      Waiting for deployment "rancher-monitoring-grafana" rollout to finish: 0 of 1 updated replicas are available...
      deployment "rancher-monitoring-grafana" successfully rolled out

    Start virtual machines

    1. Start a VM with the command:

      virtctl start $name -n $namespace

      Replace $name with the VM's name and $namespace with the VM's namespace. You can list all virtual machines with the command:

      kubectl get vms -A
      note

      You can also stop all VMs from the Harvester UI:

      1. Go to the Virtual Machines page.
      2. For each VM, select > Start.

      Alternatively, you can start all running VMs with the following command:

      cat vmi-backup.json | jq -r '.items[] | [.metadata.name, .metadata.namespace] | @tsv' | while IFS=$'\t' read -r name namespace; do
      if [ -z "$name" ]; then
      break
      fi
      echo "Start ${namespace}/${name}"
      virtctl start $name -n $namespace || true
      done

    · 2 min read
    Vicente Cheng

    Harvester OS is designed as an immutable operating system, which means you cannot directly install additional packages on it. While there is a way to install packages, it is strongly advised against doing so, as it may lead to system instability.

    If you only want to debug with the system, the preferred way is to package the toolbox image with all the needed packages.

    This article shares how to package your toolbox image and how to install any packages on the toolbox image that help you debug the system.

    For example, if you want to analyze a storage performance issue, you can install blktrace on the toolbox image.

    Create a Dockerfile

    FROM opensuse/leap:15.4

    # Install blktrace
    RUN zypper in -y \
    blktrace

    RUN zypper clean --all

    Build the image and push

    # assume you are in the directory of Dockerfile
    $ docker build -t harvester/toolbox:dev .
    .
    .
    .
    naming to docker.io/harvester/toolbox:dev ...
    $ docker push harvester/toolbox:dev
    .
    .
    d4b76d0683d4: Pushed
    a605baa225e2: Pushed
    9e9058bdf63c: Layer already exists

    After you build and push the image, you can run the toolbox using this image to trace storage performance.

    Run the toolbox

    # use `privileged` flag only when you needed. blktrace need debugfs, so I add extra mountpoint.
    docker run -it --privileged -v /sys/kernel/debug/:/sys/kernel/debug/ --rm harvester/toolbox:dev bash

    # test blktrace
    6ffa8eda3aaf:/ $ blktrace -d /dev/nvme0n1 -o - | blkparse -i -
    259,0 10 3414 0.020814875 34084 Q WS 2414127984 + 8 [fio]
    259,0 10 3415 0.020815190 34084 G WS 2414127984 + 8 [fio]
    259,0 10 3416 0.020815989 34084 C WS 3206896544 + 8 [0]
    259,0 10 3417 0.020816652 34084 C WS 2140319184 + 8 [0]
    259,0 10 3418 0.020817992 34084 P N [fio]
    259,0 10 3419 0.020818227 34084 U N [fio] 1
    259,0 10 3420 0.020818437 34084 D WS 2414127984 + 8 [fio]
    259,0 10 3421 0.020821826 34084 Q WS 1743934904 + 8 [fio]
    259,0 10 3422 0.020822150 34084 G WS 1743934904 + 8 [fio]

    - + \ No newline at end of file diff --git a/kb/tags/index.html b/kb/tags/index.html index 7020b56c..60c0af69 100644 --- a/kb/tags/index.html +++ b/kb/tags/index.html @@ -9,13 +9,13 @@ Tags | The open-source hyperconverged infrastructure solution for a cloud-native world - + - + \ No newline at end of file diff --git a/kb/tags/ip-pool/index.html b/kb/tags/ip-pool/index.html index 657ed55c..42de8404 100644 --- a/kb/tags/ip-pool/index.html +++ b/kb/tags/ip-pool/index.html @@ -9,13 +9,13 @@ One post tagged with "ip pool" | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    One post tagged with "ip pool"

    View All Tags

    · 3 min read
    Canwu Yao

    As Harvester v1.2.0 is released, a new Harvester cloud provider version 0.2.2 is integrated into RKE2 v1.24.15+rke2r1, v1.25.11+rke2r1, v1.26.6+rke2r1, v1.27.3+rke2r1, and newer versions.

    With Harvester v1.2.0, the new Harvester cloud provider offers enhanced load balancing capabilities for guest Kubernetes services. Specifically, it introduces the Harvester IP Pool feature, a built-in IP address management (IPAM) solution for the Harvester load balancer. It allows you to define an IP pool specific to a particular guest cluster by specifying the guest cluster name. For example, you can create an IP pool exclusively for the guest cluster named cluster2:

    image

    However, after upgrading, the feature is not automatically compatible with existing guest Kubernetes clusters, as they do not pass the correct cluster name to the Harvester cloud provider. Refer to issue 4232 for more details. Users can manually upgrade the Harvester cloud provider using Helm as a workaround and provide the correct cluster name after upgrading. However, this would result in a change in the load balancer IPs.

    This article outlines a workaround that allows you to leverage the new IP pool feature while keeping the load balancer IPs unchanged.

    Prerequisites

    • Download the Harvester kubeconfig file from the Harvester UI. If you have imported Harvester into Rancher, do not use the kubeconfig file from the Rancher UI. Refer to Access Harvester Cluster to get the desired one.

    • Download the kubeconfig file for the guest Kubernetes cluster you plan to upgrade. Refer to Accessing Clusters with kubectl from Your Workstation for instructions on how to download the kubeconfig file.

    Steps to Keep Load Balancer IP

    1. Execute the following script before upgrading.

      curl -sfL https://raw.githubusercontent.com/harvester/harvesterhci.io/main/kb/2023-08-21/keepip.sh | sh -s before_upgrade <Harvester-kubeconfig-path> <guest-cluster-kubeconfig-path> <guest-cluster-name> <guest-cluster-nodes-namespace>
      • <Harvester-kubeconfig-path>: Path to the Harvester kubeconfig file.
      • <guest-cluster-kubeconfig-path>: Path to the kubeconfig file of your guest Kubernetes cluster.
      • <guest-cluster-name>: Name of your guest cluster.
      • <guest-cluster-nodes-namespace>: Namespace where the VMs of the guest cluster are located.

      The script will help users copy the DHCP information to the service annotation and modify the IP pool allocated history to make sure the IP is unchanged.

      image

      After executing the script, the load balancer service with DHCP mode will be annotated with the DHCP information. For example:

      apiVersion: v1
      kind: Service
      metadata:
      annotations:
      kube-vip.io/hwaddr: 00:00:6c:4f:18:68
      kube-vip.io/requestedIP: 172.19.105.215
      name: lb0
      namespace: default

      As for the load balancer service with pool mode, the IP pool allocated history will be modified as the new load balancer name. For example:

      apiVersion: loadbalancer.harvesterhci.io/v1beta1
      kind: IPPool
      metadata:
      name: default
      spec:
      ...
      status:
      allocatedHistory:
      192.168.100.2: default/cluster-name-default-lb1-ddc13071 # replace the new load balancer name
    2. Add network selector for the pool.

      For example, the following cluster is under the VM network default/mgmt-untagged. The network selector should be default/mgmt-untagged.

      image

      image

    3. Upgrade the RKE2 cluster in the Rancher UI and select the new version.

      image

    4. Execute the script after upgrading.

      curl -sfL https://raw.githubusercontent.com/harvester/harvesterhci.io/main/kb/2023-08-21/keepip.sh | sh -s after_upgrade <Harvester-kubeconfig-path> <guest-cluster-kubeconfig-path> <guest-cluster-name> <guest-cluster-nodes-namespace>

      image

      In this step, the script wraps the operations to upgrade the Harvester cloud provider to set the cluster name. After the Harvester cloud provider is running, the new Harvester load balancers will be created with the unchanged IPs.

    - + \ No newline at end of file diff --git a/kb/tags/live-migration/index.html b/kb/tags/live-migration/index.html index d054a574..c93652f3 100644 --- a/kb/tags/live-migration/index.html +++ b/kb/tags/live-migration/index.html @@ -9,13 +9,13 @@ One post tagged with "live migration" | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    One post tagged with "live migration"

    View All Tags

    · 11 min read
    Jian Wang

    In Harvester, the VM Live Migration is well supported by the UI. Please refer to Harvester VM Live Migration for more details.

    The VM Live Migration process is finished smoothly in most cases. However, sometimes the migration may get stuck and not end as expected.

    This article dives into the VM Live Migration process in more detail. There are three main parts:

    • General Process of VM Live Migration
    • VM Live Migration Strategies
    • VM Live Migration Configurations

    Related issues:

    note

    A big part of the following contents are copied from kubevirt document https://kubevirt.io/user-guide/operations/live_migration/, some contents/formats are adjusted to fit in this document.

    General Process of VM Live Migration

    Starting a Migration from Harvester UI

    1. Go to the Virtual Machines page.
    2. Find the virtual machine that you want to migrate and select > Migrate.
    3. Choose the node to which you want to migrate the virtual machine and select Apply.

    After successfully selecting Apply, a CRD VirtualMachineInstanceMigration object is created, and the related controller/operator will start the process.

    Migration CRD Object

    You can also create the CRD VirtualMachineInstanceMigration object manually via kubectl or other tools.

    The example below starts a migration process for a virtual machine instance (VMI) new-vm.

    apiVersion: kubevirt.io/v1
    kind: VirtualMachineInstanceMigration
    metadata:
    name: migration-job
    spec:
    vmiName: new-vm

    Under the hood, the open source projects Kubevirt, Libvirt, QEMU, ... perform most of the VM Live Migration. References.

    Migration Status Reporting

    When starting a virtual machine instance (VMI), it has also been calculated whether the machine is live migratable. The result is being stored in the VMI VMI.status.conditions. The calculation can be based on multiple parameters of the VMI, however, at the moment, the calculation is largely based on the Access Mode of the VMI volumes. Live migration is only permitted when the volume access mode is set to ReadWriteMany. Requests to migrate a non-LiveMigratable VMI will be rejected.

    The reported Migration Method is also being calculated during VMI start. BlockMigration indicates that some of the VMI disks require copying from the source to the destination. LiveMigration means that only the instance memory will be copied.

    Status:
    Conditions:
    Status: True
    Type: LiveMigratable
    Migration Method: BlockMigration

    Migration Status

    The migration progress status is reported in VMI.status. Most importantly, it indicates whether the migration has been completed or failed.

    Below is an example of a successful migration.

    Migration State:
    Completed: true
    End Timestamp: 2019-03-29T03:37:52Z
    Migration Config:
    Completion Timeout Per GiB: 800
    Progress Timeout: 150
    Migration UID: c64d4898-51d3-11e9-b370-525500d15501
    Source Node: node02
    Start Timestamp: 2019-03-29T04:02:47Z
    Target Direct Migration Node Ports:
    35001: 0
    41068: 49152
    38284: 49153
    Target Node: node01
    Target Node Address: 10.128.0.46
    Target Node Domain Detected: true
    Target Pod: virt-launcher-testvmimcbjgw6zrzcmp8wpddvztvzm7x2k6cjbdgktwv8tkq

    VM Live Migration Strategies

    VM Live Migration is a process during which a running Virtual Machine Instance moves to another compute node while the guest workload continues to run and remain accessible.

    Understanding Different VM Live Migration Strategies

    VM Live Migration is a complex process. During a migration, the source VM needs to transfer its whole state (mainly RAM) to the target VM. If there are enough resources available, such as network bandwidth and CPU power, migrations should converge nicely. If this is not the scenario, however, the migration might get stuck without an ability to progress.

    The main factor that affects migrations from the guest perspective is its dirty rate, which is the rate by which the VM dirties memory. Guests with high dirty rate lead to a race during migration. On the one hand, memory would be transferred continuously to the target, and on the other, the same memory would get dirty by the guest. On such scenarios, one could consider to use more advanced migration strategies. Refer to Understanding different migration strategies for more details.

    There are 3 VM Live Migration strategies/policies:

    VM Live Migration Strategy: Pre-copy

    Pre-copy is the default strategy. It should be used for most cases.

    The way it works is as following:

    1. The target VM is created, but the guest keeps running on the source VM.
    2. The source starts sending chunks of VM state (mostly memory) to the target. This continues until all of the state has been transferred to the target.
    3. The guest starts executing on the target VM. 4. The source VM is being removed.

    Pre-copy is the safest and fastest strategy for most cases. Furthermore, it can be easily cancelled, can utilize multithreading, and more. If there is no real reason to use another strategy, this is definitely the strategy to go with.

    However, on some cases migrations might not converge easily, that is, by the time the chunk of source VM state would be received by the target VM, it would already be mutated by the source VM (which is the VM the guest executes on). There are many reasons for migrations to fail converging, such as a high dirty-rate or low resources like network bandwidth and CPU. On such scenarios, see the following alternative strategies below.

    VM Live Migration Strategy: Post-copy

    The way post-copy migrations work is as following:

    1. The target VM is created.
    2. The guest is being run on the target VM.
    3. The source starts sending chunks of VM state (mostly memory) to the target.
    4. When the guest, running on the target VM, would access memory: 1. If the memory exists on the target VM, the guest can access it. 2. Otherwise, the target VM asks for a chunk of memory from the source VM.
    5. Once all of the memory state is updated at the target VM, the source VM is being removed.

    The main idea here is that the guest starts to run immediately on the target VM. This approach has advantages and disadvantages:

    Advantages:

    • The same memory chink is never being transferred twice. This is possible due to the fact that with post-copy it doesn't matter that a page had been dirtied since the guest is already running on the target VM.
    • This means that a high dirty-rate has much less effect.
    • Consumes less network bandwidth.

    Disadvantages:

    • When using post-copy, the VM state has no one source of truth. When the guest (running on the target VM) writes to memory, this memory is one part of the guest's state, but some other parts of it may still be updated only at the source VM. This situation is generally dangerous, since, for example, if either the target or guest VMs crash the state cannot be recovered.
    • Slow warmup: when the guest starts executing, no memory is present at the target VM. Therefore, the guest would have to wait for a lot of memory in a short period of time.
    • Slower than pre-copy on most cases.
    • Harder to cancel a migration.

    VM Live Migration Strategy: Auto-converge

    Auto-converge is a technique to help pre-copy migrations converge faster without changing the core algorithm of how the migration works.

    Since a high dirty-rate is usually the most significant factor for migrations to not converge, auto-converge simply throttles the guest's CPU. If the migration would converge fast enough, the guest's CPU would not be throttled or throttled negligibly. But, if the migration would not converge fast enough, the CPU would be throttled more and more as time goes.

    This technique dramatically increases the probability of the migration converging eventually.

    Observe the VM Live Migration Progress and Result

    Migration Timeouts

    Depending on the type, the live migration process will copy virtual machine memory pages and disk blocks to the destination. During this process non-locked pages and blocks are being copied and become free for the instance to use again. To achieve a successful migration, it is assumed that the instance will write to the free pages and blocks (pollute the pages) at a lower rate than these are being copied.

    Completion Time

    In some cases the virtual machine can write to different memory pages / disk blocks at a higher rate than these can be copied, which will prevent the migration process from completing in a reasonable amount of time. In this case, live migration will be aborted if it is running for a long period of time. The timeout is calculated base on the size of the VMI, it's memory and the ephemeral disks that are needed to be copied. The configurable parameter completionTimeoutPerGiB, which defaults to 800s is the time for GiB of data to wait for the migration to be completed before aborting it. A VMI with 8Gib of memory will time out after 6400 seconds.

    Progress Timeout

    A VM Live Migration will also be aborted when it notices that copying memory doesn't make any progress. The time to wait for live migration to make progress in transferring data is configurable by the progressTimeout parameter, which defaults to 150 seconds.

    VM Live Migration Configurations

    Changing Cluster Wide Migration Limits

    KubeVirt puts some limits in place so that migrations don't overwhelm the cluster. By default, it is to only run 5 migrations in parallel with an additional limit of a maximum of 2 outbound migrations per node. Finally, every migration is limited to a bandwidth of 64MiB/s.

    You can change these values in the kubevirt CR:

        apiVersion: kubevirt.io/v1
    kind: Kubevirt
    metadata:
    name: kubevirt
    namespace: kubevirt
    spec:
    configuration:
    migrations:
    parallelMigrationsPerCluster: 5
    parallelOutboundMigrationsPerNode: 2
    bandwidthPerMigration: 64Mi
    completionTimeoutPerGiB: 800
    progressTimeout: 150
    disableTLS: false
    nodeDrainTaintKey: "kubevirt.io/drain"
    allowAutoConverge: false ---------------------> related to: Auto-converge
    allowPostCopy: false -------------------------> related to: Post-copy
    unsafeMigrationOverride: false

    Remember that most of these configurations can be overridden and fine-tuned to a specified group of VMs. For more information, please refer to the Migration Policies section below.

    Migration Policies

    Migration policies provides a new way of applying migration configurations to Virtual Machines. The policies can refine Kubevirt CR's MigrationConfiguration that sets the cluster-wide migration configurations. This way, the cluster-wide settings default how the migration policy can be refined (i.e., changed, removed, or added).

    Remember that migration policies are in version v1alpha1. This means that this API is not fully stable yet and that APIs may change in the future.

    Migration Configurations

    Currently, the MigrationPolicy spec only includes the following configurations from Kubevirt CR's MigrationConfiguration. (In the future, more configurations that aren't part of Kubevirt CR will be added):

    apiVersion: migrations.kubevirt.io/v1alpha1
    kind: MigrationPolicy
    spec:
    allowAutoConverge: true
    bandwidthPerMigration: 217Ki
    completionTimeoutPerGiB: 23
    allowPostCopy: false

    All the above fields are optional. When omitted, the configuration will be applied as defined in KubevirtCR's MigrationConfiguration. This way, KubevirtCR will serve as a configurable set of defaults for both VMs that are not bound to any MigrationPolicy and VMs that are bound to a MigrationPolicy that does not define all fields of the configurations.

    Matching Policies to VMs

    Next in the spec are the selectors defining the group of VMs to apply the policy. The options to do so are the following.

    This policy applies to the VMs in namespaces that have all the required labels:

    apiVersion: migrations.kubevirt.io/v1alpha1
    kind: MigrationPolicy
    spec:
    selectors:
    namespaceSelector:
    hpc-workloads: true # Matches a key and a value

    The policy below applies to the VMs that have all the required labels:

    apiVersion: migrations.kubevirt.io/v1alpha1
    kind: MigrationPolicy
    spec:
    selectors:
    virtualMachineInstanceSelector:
    workload-type: db # Matches a key and a value

    References

    Documents

    Libvirt Guest Migration

    Libvirt has a chapter to describe the pricipal of VM/Guest Live Migration.

    https://libvirt.org/migration.html

    Kubevirt Live Migration

    https://kubevirt.io/user-guide/operations/live_migration/

    Source Code

    The VM Live Migration related configuration options are passed to each layer correspondingly.

    Kubevirt

    https://github.com/kubevirt/kubevirt/blob/d425593ae392111dab80403ef0cde82625e37653/pkg/virt-launcher/virtwrap/live-migration-source.go#L103

    ...
    import "libvirt.org/go/libvirt"

    ...

    func generateMigrationFlags(isBlockMigration, migratePaused bool, options *cmdclient.MigrationOptions) libvirt.DomainMigrateFlags {
    ...
    if options.AllowAutoConverge {
    migrateFlags |= libvirt.MIGRATE_AUTO_CONVERGE
    }
    if options.AllowPostCopy {
    migrateFlags |= libvirt.MIGRATE_POSTCOPY
    }
    ...
    }

    Go Package Libvirt

    https://pkg.go.dev/libvirt.org/go/libvirt

    const (
    ...
    MIGRATE_AUTO_CONVERGE = DomainMigrateFlags(C.VIR_MIGRATE_AUTO_CONVERGE)
    MIGRATE_RDMA_PIN_ALL = DomainMigrateFlags(C.VIR_MIGRATE_RDMA_PIN_ALL)
    MIGRATE_POSTCOPY = DomainMigrateFlags(C.VIR_MIGRATE_POSTCOPY)
    ...
    )

    Libvirt

    https://github.com/libvirt/libvirt/blob/bfe53e9145cd5996a791c5caff0686572b850f82/include/libvirt/libvirt-domain.h#L1030

        /* Enable algorithms that ensure a live migration will eventually converge.
    * This usually means the domain will be slowed down to make sure it does
    * not change its memory faster than a hypervisor can transfer the changed
    * memory to the destination host. VIR_MIGRATE_PARAM_AUTO_CONVERGE_*
    * parameters can be used to tune the algorithm.
    *
    * Since: 1.2.3
    */
    VIR_MIGRATE_AUTO_CONVERGE = (1 << 13),
    ...
    /* Setting the VIR_MIGRATE_POSTCOPY flag tells libvirt to enable post-copy
    * migration. However, the migration will start normally and
    * virDomainMigrateStartPostCopy needs to be called to switch it into the
    * post-copy mode. See virDomainMigrateStartPostCopy for more details.
    *
    * Since: 1.3.3
    */
    VIR_MIGRATE_POSTCOPY = (1 << 15),
    - + \ No newline at end of file diff --git a/kb/tags/load-balancer/index.html b/kb/tags/load-balancer/index.html index 99d2b5b7..49c02bab 100644 --- a/kb/tags/load-balancer/index.html +++ b/kb/tags/load-balancer/index.html @@ -9,13 +9,13 @@ One post tagged with "load balancer" | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    One post tagged with "load balancer"

    View All Tags

    · 3 min read
    Canwu Yao

    As Harvester v1.2.0 is released, a new Harvester cloud provider version 0.2.2 is integrated into RKE2 v1.24.15+rke2r1, v1.25.11+rke2r1, v1.26.6+rke2r1, v1.27.3+rke2r1, and newer versions.

    With Harvester v1.2.0, the new Harvester cloud provider offers enhanced load balancing capabilities for guest Kubernetes services. Specifically, it introduces the Harvester IP Pool feature, a built-in IP address management (IPAM) solution for the Harvester load balancer. It allows you to define an IP pool specific to a particular guest cluster by specifying the guest cluster name. For example, you can create an IP pool exclusively for the guest cluster named cluster2:

    image

    However, after upgrading, the feature is not automatically compatible with existing guest Kubernetes clusters, as they do not pass the correct cluster name to the Harvester cloud provider. Refer to issue 4232 for more details. Users can manually upgrade the Harvester cloud provider using Helm as a workaround and provide the correct cluster name after upgrading. However, this would result in a change in the load balancer IPs.

    This article outlines a workaround that allows you to leverage the new IP pool feature while keeping the load balancer IPs unchanged.

    Prerequisites

    • Download the Harvester kubeconfig file from the Harvester UI. If you have imported Harvester into Rancher, do not use the kubeconfig file from the Rancher UI. Refer to Access Harvester Cluster to get the desired one.

    • Download the kubeconfig file for the guest Kubernetes cluster you plan to upgrade. Refer to Accessing Clusters with kubectl from Your Workstation for instructions on how to download the kubeconfig file.

    Steps to Keep Load Balancer IP

    1. Execute the following script before upgrading.

      curl -sfL https://raw.githubusercontent.com/harvester/harvesterhci.io/main/kb/2023-08-21/keepip.sh | sh -s before_upgrade <Harvester-kubeconfig-path> <guest-cluster-kubeconfig-path> <guest-cluster-name> <guest-cluster-nodes-namespace>
      • <Harvester-kubeconfig-path>: Path to the Harvester kubeconfig file.
      • <guest-cluster-kubeconfig-path>: Path to the kubeconfig file of your guest Kubernetes cluster.
      • <guest-cluster-name>: Name of your guest cluster.
      • <guest-cluster-nodes-namespace>: Namespace where the VMs of the guest cluster are located.

      The script will help users copy the DHCP information to the service annotation and modify the IP pool allocated history to make sure the IP is unchanged.

      image

      After executing the script, the load balancer service with DHCP mode will be annotated with the DHCP information. For example:

      apiVersion: v1
      kind: Service
      metadata:
      annotations:
      kube-vip.io/hwaddr: 00:00:6c:4f:18:68
      kube-vip.io/requestedIP: 172.19.105.215
      name: lb0
      namespace: default

      As for the load balancer service with pool mode, the IP pool allocated history will be modified as the new load balancer name. For example:

      apiVersion: loadbalancer.harvesterhci.io/v1beta1
      kind: IPPool
      metadata:
      name: default
      spec:
      ...
      status:
      allocatedHistory:
      192.168.100.2: default/cluster-name-default-lb1-ddc13071 # replace the new load balancer name
    2. Add network selector for the pool.

      For example, the following cluster is under the VM network default/mgmt-untagged. The network selector should be default/mgmt-untagged.

      image

      image

    3. Upgrade the RKE2 cluster in the Rancher UI and select the new version.

      image

    4. Execute the script after upgrading.

      curl -sfL https://raw.githubusercontent.com/harvester/harvesterhci.io/main/kb/2023-08-21/keepip.sh | sh -s after_upgrade <Harvester-kubeconfig-path> <guest-cluster-kubeconfig-path> <guest-cluster-name> <guest-cluster-nodes-namespace>

      image

      In this step, the script wraps the operations to upgrade the Harvester cloud provider to set the cluster name. After the Harvester cloud provider is running, the new Harvester load balancers will be created with the unchanged IPs.

    - + \ No newline at end of file diff --git a/kb/tags/longhorn/index.html b/kb/tags/longhorn/index.html index c3217be8..f3a3aaea 100644 --- a/kb/tags/longhorn/index.html +++ b/kb/tags/longhorn/index.html @@ -9,7 +9,7 @@ 4 posts tagged with "longhorn" | The open-source hyperconverged infrastructure solution for a cloud-native world - + @@ -17,7 +17,7 @@

    4 posts tagged with "longhorn"

    View All Tags

    · 2 min read
    David Ko
    Jillian Maroket

    The Longhorn documentation provides best practice recommendations for deploying Longhorn in production environments. Before configuring workloads, ensure that you have set up the following basic requirements for optimal disk performance.

    • SATA/NVMe SSDs or disk drives with similar performance
    • 10 Gbps network bandwidth between nodes
    • Dedicated Priority Classes for system-managed and user-deployed Longhorn components

    The following sections outline other recommendations for achieving optimal disk performance.

    IO Performance

    • Storage network: Use a dedicated storage network to improve IO performance and stability.

    • Longhorn disk: Use a dedicated disk for Longhorn storage instead of using the root disk.

    • Replica count: Set the default replica count to "2" to achieve data availability with better disk space usage or less impact to system performance. This practice is especially beneficial to data-intensive applications.

    • Storage tag: Use storage tags to define storage tiering for data-intensive applications. For example, only high-performance disks can be used for storing performance-sensitive data. You can either add disks with tags or create StorageClasses with tags.

    • Data locality: Use best-effort as the default data locality of Longhorn Storage Classes.

      For applications that support data replication (for example, a distributed database), you can use the strict-local option to ensure that only one replica is created for each volume. This practice prevents the extra disk space usage and IO performance overhead associated with volume replication.

      For data-intensive applications, you can use pod scheduling functions such as node selector or taint toleration. These functions allow you to schedule the workload to a specific storage-tagged node together with one replica.

    Space Efficiency

    • Recurring snapshots: Periodically clean up system-generated snapshots and retain only the number of snapshots that makes sense for your implementation.

      For applications with replication capability, periodically delete all types of snapshots.

    Disaster Recovery

    • Recurring backups: Create recurring backup jobs for mission-critical application volumes.

    • System backup: Run periodic system backups.

    · 7 min read
    Kiefer Chang

    Harvester v1.2.0 introduces a new enhancement where Longhorn system-managed components in newly-deployed clusters are automatically assigned a system-cluster-critical priority class by default. However, when upgrading your Harvester clusters from previous versions, you may notice that Longhorn system-managed components do not have any priority class set.

    This behavior is intentional and aimed at supporting zero-downtime upgrades. Longhorn does not allow changing the priority-class setting when attached volumes exist. For more details, please refer to Setting Priority Class During Longhorn Installation).

    This article explains how to manually configure priority classes for Longhorn system-managed components after upgrading your Harvester cluster, ensuring that your Longhorn components have the appropriate priority class assigned and maintaining the stability and performance of your system.

    Stop all virtual machines

    Stop all virtual machines (VMs) to detach all volumes. Please back up any work before doing this.

    1. Login to a Harvester controller node and become root.

    2. Get all running VMs and write down their namespaces and names:

      kubectl get vmi -A

      Alternatively, you can get this information by backing up the Virtual Machine Instance (VMI) manifests with the following command:

      kubectl get vmi -A -o json > vmi-backup.json
    3. Shut down all VMs. Log in to all running VMs and shut them down gracefully (recommended). Or use the following command to send shutdown signals to all VMs:

      kubectl get vmi -A -o json | jq -r '.items[] | [.metadata.name, .metadata.namespace] | @tsv' | while IFS=$'\t' read -r name namespace; do
      if [ -z "$name" ]; then
      break
      fi
      echo "Stop ${namespace}/${name}"
      virtctl stop $name -n $namespace
      done
      note

      You can also stop all VMs from the Harvester UI:

      1. Go to the Virtual Machines page.
      2. For each VM, select > Stop.
    4. Ensure there are no running VMs:

      Run the command:

      kubectl get vmi -A

      The above command must return:

      No resources found

    Scale down monitoring pods

    1. Scale down the Prometheus deployment. Run the following command and wait for all Prometheus pods to terminate:

      kubectl patch -n cattle-monitoring-system prometheus/rancher-monitoring-prometheus --patch '{"spec": {"replicas": 0}}' --type merge && \
      sleep 5 && \
      kubectl rollout status --watch=true -n cattle-monitoring-system statefulset/prometheus-rancher-monitoring-prometheus

      A sample output looks like this:

      prometheus.monitoring.coreos.com/rancher-monitoring-prometheus patched
      statefulset rolling update complete 0 pods at revision prometheus-rancher-monitoring-prometheus-cbf6bd5f7...
    2. Scale down the AlertManager deployment. Run the following command and wait for all AlertManager pods to terminate:

      kubectl patch -n cattle-monitoring-system alertmanager/rancher-monitoring-alertmanager --patch '{"spec": {"replicas": 0}}' --type merge && \
      sleep 5 && \
      kubectl rollout status --watch=true -n cattle-monitoring-system statefulset/alertmanager-rancher-monitoring-alertmanager

      A sample output looks like this:

      alertmanager.monitoring.coreos.com/rancher-monitoring-alertmanager patched
      statefulset rolling update complete 0 pods at revision alertmanager-rancher-monitoring-alertmanager-c8c459dff...
    3. Scale down the Grafana deployment. Run the following command and wait for all Grafana pods to terminate:

      kubectl scale --replicas=0 deployment/rancher-monitoring-grafana -n cattle-monitoring-system && \
      sleep 5 && \
      kubectl rollout status --watch=true -n cattle-monitoring-system deployment/rancher-monitoring-grafana

      A sample output looks like this:

      deployment.apps/rancher-monitoring-grafana scaled
      deployment "rancher-monitoring-grafana" successfully rolled out

    Scale down vm-import-controller pods

    1. Check if the vm-import-controller addon is enabled and configured with a persistent volume with the following command:

      kubectl get pvc -n harvester-system harvester-vm-import-controller

      If the above command returns an output like this, you must scale down the vm-import-controller pod. Otherwise, you can skip the following step.

      NAME                             STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLASS         AGE
      harvester-vm-import-controller Bound pvc-eb23e838-4c64-4650-bd8f-ba7075ab0559 200Gi RWO harvester-longhorn 2m53s
    2. Scale down the vm-import-controller pods with the following command:

      kubectl scale --replicas=0 deployment/harvester-vm-import-controller -n harvester-system && \
      sleep 5 && \
      kubectl rollout status --watch=true -n harvester-system deployment/harvester-vm-import-controller

      A sample output looks like this:

      deployment.apps/harvester-vm-import-controller scaled
      deployment "harvester-vm-import-controller" successfully rolled out

    Set the priority-class setting

    1. Before applying the priority-class setting, you need to verify all volumes are detached. Run the following command to verify the STATE of each volume is detached:

      kubectl get volumes.longhorn.io -A

      Verify the output looks like this:

      NAMESPACE         NAME                                       STATE      ROBUSTNESS   SCHEDULED   SIZE           NODE   AGE
      longhorn-system pvc-5743fd02-17a3-4403-b0d3-0e9b401cceed detached unknown 5368709120 15d
      longhorn-system pvc-7e389fe8-984c-4049-9ba8-5b797cb17278 detached unknown 53687091200 15d
      longhorn-system pvc-8df64e54-ecdb-4d4e-8bab-28d81e316b8b detached unknown 2147483648 15d
      longhorn-system pvc-eb23e838-4c64-4650-bd8f-ba7075ab0559 detached unknown 214748364800 11m
    2. Set the priority-class setting with the following command:

      kubectl patch -n longhorn-system settings.longhorn.io priority-class --patch '{"value": "system-cluster-critical"}' --type merge

      Longhorn system-managed pods will restart and then you need to check if all the system-managed components have a priority class set:

      Get the value of the priority class system-cluster-critical:

      kubectl get priorityclass system-cluster-critical

      Verify the output looks like this:

      NAME                      VALUE        GLOBAL-DEFAULT   AGE
      system-cluster-critical 2000000000 false 15d
    3. Use the following command to get pods' priority in the longhorn-system namespace:

      kubectl get pods -n longhorn-system -o custom-columns="Name":metadata.name,"Priority":.spec.priority
    4. Verify all system-managed components' pods have the correct priority. System-managed components include:

      • csi-attacher
      • csi-provisioner
      • csi-resizer
      • csi-snapshotter
      • engine-image-ei
      • instance-manager-e
      • instance-manager-r
      • longhorn-csi-plugin

    Scale up vm-import-controller pods

    If you scale down the vm-import-controller pods, you must scale it up again.

    1. Scale up the vm-import-controller pod. Run the command:

      kubectl scale --replicas=1 deployment/harvester-vm-import-controller -n harvester-system && \
      sleep 5 && \
      kubectl rollout status --watch=true -n harvester-system deployment/harvester-vm-import-controller

      A sample output looks like this:

      deployment.apps/harvester-vm-import-controller scaled
      Waiting for deployment "harvester-vm-import-controller" rollout to finish: 0 of 1 updated replicas are available...
      deployment "harvester-vm-import-controller" successfully rolled out
    2. Verify vm-import-controller is running using the following command:

      kubectl get pods --selector app.kubernetes.io/instance=vm-import-controller -A

      A sample output looks like this, the pod's STATUS must be Running:

      NAMESPACE          NAME                                              READY   STATUS    RESTARTS   AGE
      harvester-system harvester-vm-import-controller-6bd8f44f55-m9k86 1/1 Running 0 4m53s

    Scale up monitoring pods

    1. Scale up the Prometheus deployment. Run the following command and wait for all Prometheus pods to roll out:

      kubectl patch -n cattle-monitoring-system prometheus/rancher-monitoring-prometheus --patch '{"spec": {"replicas": 1}}' --type merge && \
      sleep 5 && \
      kubectl rollout status --watch=true -n cattle-monitoring-system statefulset/prometheus-rancher-monitoring-prometheus

      A sample output looks like:

      prometheus.monitoring.coreos.com/rancher-monitoring-prometheus patched
      Waiting for 1 pods to be ready...
      statefulset rolling update complete 1 pods at revision prometheus-rancher-monitoring-prometheus-cbf6bd5f7...
    2. Scale down the AlertManager deployment. Run the following command and wait for all AlertManager pods to roll out:

      kubectl patch -n cattle-monitoring-system alertmanager/rancher-monitoring-alertmanager --patch '{"spec": {"replicas": 1}}' --type merge && \
      sleep 5 && \
      kubectl rollout status --watch=true -n cattle-monitoring-system statefulset/alertmanager-rancher-monitoring-alertmanager

      A sample output looks like this:

      alertmanager.monitoring.coreos.com/rancher-monitoring-alertmanager patched
      Waiting for 1 pods to be ready...
      statefulset rolling update complete 1 pods at revision alertmanager-rancher-monitoring-alertmanager-c8bd4466c...
    3. Scale down the Grafana deployment. Run the following command and wait for all Grafana pods to roll out:

      kubectl scale --replicas=1 deployment/rancher-monitoring-grafana -n cattle-monitoring-system && \
      sleep 5 && \
      kubectl rollout status --watch=true -n cattle-monitoring-system deployment/rancher-monitoring-grafana

      A sample output looks like this:

      deployment.apps/rancher-monitoring-grafana scaled
      Waiting for deployment "rancher-monitoring-grafana" rollout to finish: 0 of 1 updated replicas are available...
      deployment "rancher-monitoring-grafana" successfully rolled out

    Start virtual machines

    1. Start a VM with the command:

      virtctl start $name -n $namespace

      Replace $name with the VM's name and $namespace with the VM's namespace. You can list all virtual machines with the command:

      kubectl get vms -A
      note

      You can also stop all VMs from the Harvester UI:

      1. Go to the Virtual Machines page.
      2. For each VM, select > Start.

      Alternatively, you can start all running VMs with the following command:

      cat vmi-backup.json | jq -r '.items[] | [.metadata.name, .metadata.namespace] | @tsv' | while IFS=$'\t' read -r name namespace; do
      if [ -z "$name" ]; then
      break
      fi
      echo "Start ${namespace}/${name}"
      virtctl start $name -n $namespace || true
      done

    · 4 min read
    Vicente Cheng

    In earlier versions of Harvester (v1.0.3 and prior), Longhorn volumes may get corrupted during the replica rebuilding process (reference: Analysis: Potential Data/Filesystem Corruption). In Harvester v1.1.0 and later versions, the Longhorn team has fixed this issue. This article covers manual steps you can take to scan the VM's filesystem and repair it if needed.

    Stop The VM And Backup Volume

    Before you scan the filesystem, it is recommend you back up the volume first. For an example, refer to the following steps to stop the VM and backup the volume.

    • Find the target VM.

    finding the target VM

    • Stop the target VM.

    Stop the target VM

    The target VM is stopped and the related volumes are detached. Now go to the Longhorn UI to backup this volume.

    • Enable Developer Tools & Features (Preferences -> Enable Developer Tools & Features).

    Preferences then enable developer mode Enable the developer mode

    • Click the button and select Edit Config to edit the config page of the VM.

    goto edit config page of VM

    • Go to the Volumes tab and select Check volume details.

    link to longhorn volume page

    • Click the dropdown menu on the right side and select 'Attach' to attach the volume again.

    attach this volume again

    • Select the attached node.

    choose the attached node

    • Check the volume attached under Volume Details and select Take Snapshot on this volume page.

    take snapshot on volume page

    • Confirm that the snapshot is ready.

    check the snapshot is ready

    Now that you completed the volume backup, you need to scan and repair the root filesystem.

    Scanning the root filesystem and repairing

    This section will introduce how to scan the filesystem (e.g., XFS, EXT4) using related tools.

    Before scanning, you need to know the filesystem's device/partition.

    • Identify the filesystem's device by checking the major and minor numbers of that device.
    1. Obtain the major and minor numbers from the listed volume information.

      In the following example, the volume name is pvc-ea7536c0-301f-479e-b2a2-e40ddc864b58.

      harvester-node-0:~ # ls /dev/longhorn/pvc-ea7536c0-301f-479e-b2a2-e40ddc864b58 -al
      brw-rw---- 1 root root 8, 0 Oct 23 14:43 /dev/longhorn/pvc-ea7536c0-301f-479e-b2a2-e40ddc864b58

      The output indicates that the major and minor numbers are 8:0.

    2. Obtain the device name from the output of the lsblk command.

      harvester-node-0:~ # lsblk
      NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINTS
      loop0 7:0 0 3G 1 loop /
      sda 8:0 0 40G 0 disk
      ├─sda1 8:1 0 2M 0 part
      ├─sda2 8:2 0 20M 0 part
      └─sda3 8:3 0 40G 0 part

      The output indicates that 8:0 are the major and minor numbers of the device named sda. Therefore, /dev/sda is related to the volume named pvc-ea7536c0-301f-479e-b2a2-e40ddc864b58.

    • You should now know the filesystem's partition. In the example below, sda3 is the filesystem's partition.
    • Use the Filesystem toolbox image to scan and repair.
    # docker run -it --rm --privileged registry.opensuse.org/isv/rancher/harvester/toolbox/main/fs-toolbox:latest -- bash

    Then we try to scan with this target device.

    XFS

    When scanning an XFS filesystem, use the xfs_repair command and specify the problematic partition of the device.

    In the following example, /dev/sda3 is the problematic partition.

    # xfs_repair -n /dev/sda3

    To repair the corrupted partition, run the following command.

    # xfs_repair /dev/sda3

    EXT4

    When scanning a EXT4 filesystem, use the e2fsck command as follows, where the /dev/sde1 is the problematic partition of the device.

    # e2fsck -f /dev/sde1

    To repair the corrupted partition, run the following command.

    # e2fsck -fp /dev/sde1

    After using the 'e2fsck' command, you should also see logs related to scanning and repairing the partition. Scanning and repairing the corrupted partition is successful if there are no errors in these logs.

    Detach and Start VM again.

    After the corrupted partition is scanned and repaired, detach the volume and try to start the related VM again.

    • Detach the volume from the Longhorn UI.

    detach volume on longhorn UI

    • Start the related VM again from the Harvester UI.

    Start VM again

    Your VM should now work normally.

    · 2 min read
    Kiefer Chang

    Harvester replicates volumes data across disks in a cluster. Before removing a disk, the user needs to evict replicas on the disk to other disks to preserve the volumes' configured availability. For more information about eviction in Longhorn, please check Evicting Replicas on Disabled Disks or Nodes.

    Preparation

    This document describes how to evict Longhorn disks using the kubectl command. Before that, users must ensure the environment is set up correctly. There are two recommended ways to do this:

    1. Log in to any management node and switch to root (sudo -i).
    2. Download Kubeconfig file and use it locally
      • Install kubectl and yq program manually.
      • Open Harvester GUI, click support at the bottom left of the page and click Download KubeConfig to download the Kubeconfig file.
      • Set the Kubeconfig file's path to KUBECONFIG environment variable. For example, export KUBECONFIG=/path/to/kubeconfig.

    Evicting replicas from a disk

    1. List Longhorn nodes (names are identical to Kubernetes nodes):

      kubectl get -n longhorn-system nodes.longhorn.io

      Sample output:

      NAME    READY   ALLOWSCHEDULING   SCHEDULABLE   AGE
      node1 True true True 24d
      node2 True true True 24d
      node3 True true True 24d
    2. List disks on a node. Assume we want to evict replicas of a disk on node1:

      kubectl get -n longhorn-system nodes.longhorn.io node1 -o yaml | yq e '.spec.disks'

      Sample output:

      default-disk-ed7af10f5b8356be:
      allowScheduling: true
      evictionRequested: false
      path: /var/lib/harvester/defaultdisk
      storageReserved: 36900254515
      tags: []
    3. Assume disk default-disk-ed7af10f5b8356be is the target we want to evict replicas out of.

      Edit the node:

      kubectl edit -n longhorn-system nodes.longhorn.io node1 

      Update these two fields and save:

      • spec.disks.<disk_name>.allowScheduling to false
      • spec.disks.<disk_name>.evictionRequested to true

      Sample editing:

      default-disk-ed7af10f5b8356be:
      allowScheduling: false
      evictionRequested: true
      path: /var/lib/harvester/defaultdisk
      storageReserved: 36900254515
      tags: []
    4. Wait for all replicas on the disk to be evicted.

      Get current scheduled replicas on the disk:

      kubectl get -n longhorn-system nodes.longhorn.io node1 -o yaml | yq e '.status.diskStatus.default-disk-ed7af10f5b8356be.scheduledReplica'

      Sample output:

      pvc-86d3d212-d674-4c64-b69b-4a2eb1df2272-r-7b422db7: 5368709120
      pvc-b06f0b09-f30c-4936-8a2a-425b993dd6cb-r-bb0fa6b3: 2147483648
      pvc-b844bcc6-3b06-4367-a136-3909251cb560-r-08d1ab3c: 53687091200
      pvc-ea6e0dff-f446-4a38-916a-b3bea522f51c-r-193ca5c6: 10737418240

      Run the command repeatedly, and the output should eventually become an empty map:

      {}

      This means Longhorn evicts replicas on the disk to other disks.

      note

      If a replica always stays in a disk, please open the Longhorn GUI and check if there is free space on other disks.

    - + \ No newline at end of file diff --git a/kb/tags/network/index.html b/kb/tags/network/index.html index 90512d76..8f735954 100644 --- a/kb/tags/network/index.html +++ b/kb/tags/network/index.html @@ -9,14 +9,14 @@ 2 posts tagged with "network" | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    2 posts tagged with "network"

    View All Tags

    · 2 min read
    Date Huang

    NIC Naming Scheme changed after upgrading to v1.0.1

    systemd in OpenSUSE Leap 15.3 which is the base OS of Harvester is upgraded to 246.16-150300.7.39.1. In this version, systemd will enable additional naming scheme sle15-sp3 which is v238 with bridge_no_slot. When there is a PCI bridge associated with NIC, systemd will never generate ID_NET_NAME_SLOT and naming policy in /usr/lib/systemd/network/99-default.link will fallback to ID_NET_NAME_PATH. According to this change, NIC names might be changed in your Harvester nodes during the upgrade process from v1.0.0 to v1.0.1-rc1 or above, and it will cause network issues that are associated with NIC names.

    Effect Settings and Workaround

    Startup Network Configuration

    NIC name changes will need to update the name in /oem/99_custom.yaml. You could use migration script to change the NIC names which are associated with a PCI bridge.

    tip

    You could find an identical machine to test naming changes before applying the configuration to production machines

    You could simply execute the script with root account in v1.0.0 via

    # python3 udev_v238_sle15-sp3.py

    It will output the patched configuration to the screen and you could compare it to the original one to ensure there is no exception. (e.g. We could use vimdiff to check the configuration)

    # python3 udev_v238_sle15-spe3.py > /oem/test
    # vimdiff /oem/test /oem/99_custom.yaml

    After checking the result, we could execute the script with --really-want-to-do to override the configuration. It will also back up the original configuration file with a timestamp before patching it.

    # python3 udev_v238_sle15-sp3.py --really-want-to-do

    Harvester VLAN Network Configuration

    If your VLAN network is associated with NIC name directly without bonding, you will need to migrate ClusterNetwork and NodeNetwork with the previous section together.

    note

    If your VLAN network is associated with the bonding name in /oem/99_custom.yaml, you could skip this section.

    Modify ClusterNetworks

    You need to modify ClusterNetworks via

    $ kubectl edit clusternetworks vlan

    search this pattern

    config:
    defaultPhysicalNIC: <Your NIC name>

    and change to new NIC name

    Modify NodeNetworks

    You need to modify NodeNetworks via

    $ kubectl edit nodenetworks <Node name>-vlan

    search this pattern

    spec:
    nic: <Your NIC name>

    and change to new NIC name

    · 4 min read
    Date Huang

    What is the default behavior of a VM with multiple NICs

    In some scenarios, you'll setup two or more NICs in your VM to serve different networking purposes. If all networks are setup by default with DHCP, you might get random connectivity issues. And while it might get fixed after rebooting the VM, it still will lose connection randomly after some period.

    How-to identify connectivity issues

    In a Linux VM, you can use commands from the iproute2 package to identify the default route.

    In your VM, execute the following command:

    ip route show default
    tip

    If you get the access denied error, please run the command using sudo

    The output of this command will only show the default route with the gateway and VM IP of the primary network interface (eth0 in the example below).

    default via <Gateway IP> dev eth0 proto dhcp src <VM IP> metric 100

    Here is the full example:

    $ ip route show default
    default via 192.168.0.254 dev eth0 proto dhcp src 192.168.0.100 metric 100

    However, if the issue covered in this KB occurs, you'll only be able to connect to the VM via the VNC or serial console.

    Once connected, you can run again the same command as before:

    $ ip route show default

    However, this time you'll get a default route with an incorrect gateway IP. For example:

    default via <Incorrect Gateway IP> dev eth0 proto dhcp src <VM's IP> metric 100

    Why do connectivity issues occur randomly

    In a standard setup, cloud-based VMs typically use DHCP for their NICs configuration. It will set an IP and a gateway for each NIC. Lastly, a default route to the gateway IP will also be added, so you can use its IP to connect to the VM.

    However, Linux distributions start multiple DHCP clients at the same time and do not have a priority system. This means that if you have two or more NICs configured with DHCP, the client will enter a race condition to configure the default route. And depending on the currently running Linux distribution DHCP script, there is no guarantee which default route will be configured.

    As the default route might change in every DHCP renewing process or after every OS reboot, this will create network connectivity issues.

    How to avoid the random connectivity issues

    You can easily avoid these connectivity issues by having only one NIC attached to the VM and having only one IP and one gateway configured.

    However, for VMs in more complex infrastructures, it is often not possible to use just one NIC. For example, if your infrastructure has a storage network and a service network. For security reasons, the storage network will be isolated from the service network and have a separate subnet. In this case, you must have two NICs to connect to both the service and storage networks.

    You can choose a solution below that meets your requirements and security policy.

    Disable DHCP on secondary NIC

    As mentioned above, the problem is caused by a race condition between two DHCP clients. One solution to avoid this problem is to disable DHCP for all NICs and configure them with static IPs only. Likewise, you can configure the secondary NIC with a static IP and keep the primary NIC enabled with DHCP.

    1. To configure the primary NIC with a static IP (eth0 in this example), you can edit the file /etc/sysconfig/network/ifcfg-eth0 with the following values:
    BOOTPROTO='static'
    IPADDR='192.168.0.100'
    NETMASK='255.255.255.0'

    Alternatively, if you want to reserve the primary NIC using DHCP (eth0 in this example), use the following values instead:

    BOOTPROTO='dhcp'
    DHCLIENT_SET_DEFAULT_ROUTE='yes'
    1. You need to configure the default route by editing the file /etc/sysconfig/network/ifroute-eth0 (if you configured the primary NIC using DHCP, skip this step):
    # Destination  Dummy/Gateway  Netmask  Interface
    default 192.168.0.254 - eth0
    warning

    Do not put other default route for your secondary NIC

    1. Finally, configure a static IP for the secondary NIC by editing the file /etc/sysconfig/network/ifcfg-eth1:
    BOOTPROTO='static'
    IPADDR='10.0.0.100'
    NETMASK='255.255.255.0'

    Cloud-Init config

    network:
    version: 1
    config:
    - type: physical
    name: eth0
    subnets:
    - type: dhcp
    - type: physical
    name: eth1
    subnets:
    - type: static
    address: 10.0.0.100/24

    Disable secondary NIC default route from DHCP

    If your secondary NIC requires to get its IP from DHCP, you'll need to disable the secondary NIC default route configuration.

    1. Confirm that the primary NIC configures its default route in the file /etc/sysconfig/network/ifcfg-eth0:
    BOOTPROTO='dhcp'
    DHCLIENT_SET_DEFAULT_ROUTE='yes'
    1. Disable the secondary NIC default route configuration by editing the file /etc/sysconfig/network/ifcfg-eth1:
    BOOTPROTO='dhcp'
    DHCLIENT_SET_DEFAULT_ROUTE='no'

    Cloud-Init config

    This solution is not available in Cloud-Init. Cloud-Init didn't allow any option for DHCP.

    - + \ No newline at end of file diff --git a/kb/tags/policy/index.html b/kb/tags/policy/index.html index 4d72727d..26536665 100644 --- a/kb/tags/policy/index.html +++ b/kb/tags/policy/index.html @@ -9,13 +9,13 @@ One post tagged with "policy" | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    One post tagged with "policy"

    View All Tags

    · 11 min read
    Jian Wang

    In Harvester, the VM Live Migration is well supported by the UI. Please refer to Harvester VM Live Migration for more details.

    The VM Live Migration process is finished smoothly in most cases. However, sometimes the migration may get stuck and not end as expected.

    This article dives into the VM Live Migration process in more detail. There are three main parts:

    • General Process of VM Live Migration
    • VM Live Migration Strategies
    • VM Live Migration Configurations

    Related issues:

    note

    A big part of the following contents are copied from kubevirt document https://kubevirt.io/user-guide/operations/live_migration/, some contents/formats are adjusted to fit in this document.

    General Process of VM Live Migration

    Starting a Migration from Harvester UI

    1. Go to the Virtual Machines page.
    2. Find the virtual machine that you want to migrate and select > Migrate.
    3. Choose the node to which you want to migrate the virtual machine and select Apply.

    After successfully selecting Apply, a CRD VirtualMachineInstanceMigration object is created, and the related controller/operator will start the process.

    Migration CRD Object

    You can also create the CRD VirtualMachineInstanceMigration object manually via kubectl or other tools.

    The example below starts a migration process for a virtual machine instance (VMI) new-vm.

    apiVersion: kubevirt.io/v1
    kind: VirtualMachineInstanceMigration
    metadata:
    name: migration-job
    spec:
    vmiName: new-vm

    Under the hood, the open source projects Kubevirt, Libvirt, QEMU, ... perform most of the VM Live Migration. References.

    Migration Status Reporting

    When starting a virtual machine instance (VMI), it has also been calculated whether the machine is live migratable. The result is being stored in the VMI VMI.status.conditions. The calculation can be based on multiple parameters of the VMI, however, at the moment, the calculation is largely based on the Access Mode of the VMI volumes. Live migration is only permitted when the volume access mode is set to ReadWriteMany. Requests to migrate a non-LiveMigratable VMI will be rejected.

    The reported Migration Method is also being calculated during VMI start. BlockMigration indicates that some of the VMI disks require copying from the source to the destination. LiveMigration means that only the instance memory will be copied.

    Status:
    Conditions:
    Status: True
    Type: LiveMigratable
    Migration Method: BlockMigration

    Migration Status

    The migration progress status is reported in VMI.status. Most importantly, it indicates whether the migration has been completed or failed.

    Below is an example of a successful migration.

    Migration State:
    Completed: true
    End Timestamp: 2019-03-29T03:37:52Z
    Migration Config:
    Completion Timeout Per GiB: 800
    Progress Timeout: 150
    Migration UID: c64d4898-51d3-11e9-b370-525500d15501
    Source Node: node02
    Start Timestamp: 2019-03-29T04:02:47Z
    Target Direct Migration Node Ports:
    35001: 0
    41068: 49152
    38284: 49153
    Target Node: node01
    Target Node Address: 10.128.0.46
    Target Node Domain Detected: true
    Target Pod: virt-launcher-testvmimcbjgw6zrzcmp8wpddvztvzm7x2k6cjbdgktwv8tkq

    VM Live Migration Strategies

    VM Live Migration is a process during which a running Virtual Machine Instance moves to another compute node while the guest workload continues to run and remain accessible.

    Understanding Different VM Live Migration Strategies

    VM Live Migration is a complex process. During a migration, the source VM needs to transfer its whole state (mainly RAM) to the target VM. If there are enough resources available, such as network bandwidth and CPU power, migrations should converge nicely. If this is not the scenario, however, the migration might get stuck without an ability to progress.

    The main factor that affects migrations from the guest perspective is its dirty rate, which is the rate by which the VM dirties memory. Guests with high dirty rate lead to a race during migration. On the one hand, memory would be transferred continuously to the target, and on the other, the same memory would get dirty by the guest. On such scenarios, one could consider to use more advanced migration strategies. Refer to Understanding different migration strategies for more details.

    There are 3 VM Live Migration strategies/policies:

    VM Live Migration Strategy: Pre-copy

    Pre-copy is the default strategy. It should be used for most cases.

    The way it works is as following:

    1. The target VM is created, but the guest keeps running on the source VM.
    2. The source starts sending chunks of VM state (mostly memory) to the target. This continues until all of the state has been transferred to the target.
    3. The guest starts executing on the target VM. 4. The source VM is being removed.

    Pre-copy is the safest and fastest strategy for most cases. Furthermore, it can be easily cancelled, can utilize multithreading, and more. If there is no real reason to use another strategy, this is definitely the strategy to go with.

    However, on some cases migrations might not converge easily, that is, by the time the chunk of source VM state would be received by the target VM, it would already be mutated by the source VM (which is the VM the guest executes on). There are many reasons for migrations to fail converging, such as a high dirty-rate or low resources like network bandwidth and CPU. On such scenarios, see the following alternative strategies below.

    VM Live Migration Strategy: Post-copy

    The way post-copy migrations work is as following:

    1. The target VM is created.
    2. The guest is being run on the target VM.
    3. The source starts sending chunks of VM state (mostly memory) to the target.
    4. When the guest, running on the target VM, would access memory: 1. If the memory exists on the target VM, the guest can access it. 2. Otherwise, the target VM asks for a chunk of memory from the source VM.
    5. Once all of the memory state is updated at the target VM, the source VM is being removed.

    The main idea here is that the guest starts to run immediately on the target VM. This approach has advantages and disadvantages:

    Advantages:

    • The same memory chink is never being transferred twice. This is possible due to the fact that with post-copy it doesn't matter that a page had been dirtied since the guest is already running on the target VM.
    • This means that a high dirty-rate has much less effect.
    • Consumes less network bandwidth.

    Disadvantages:

    • When using post-copy, the VM state has no one source of truth. When the guest (running on the target VM) writes to memory, this memory is one part of the guest's state, but some other parts of it may still be updated only at the source VM. This situation is generally dangerous, since, for example, if either the target or guest VMs crash the state cannot be recovered.
    • Slow warmup: when the guest starts executing, no memory is present at the target VM. Therefore, the guest would have to wait for a lot of memory in a short period of time.
    • Slower than pre-copy on most cases.
    • Harder to cancel a migration.

    VM Live Migration Strategy: Auto-converge

    Auto-converge is a technique to help pre-copy migrations converge faster without changing the core algorithm of how the migration works.

    Since a high dirty-rate is usually the most significant factor for migrations to not converge, auto-converge simply throttles the guest's CPU. If the migration would converge fast enough, the guest's CPU would not be throttled or throttled negligibly. But, if the migration would not converge fast enough, the CPU would be throttled more and more as time goes.

    This technique dramatically increases the probability of the migration converging eventually.

    Observe the VM Live Migration Progress and Result

    Migration Timeouts

    Depending on the type, the live migration process will copy virtual machine memory pages and disk blocks to the destination. During this process non-locked pages and blocks are being copied and become free for the instance to use again. To achieve a successful migration, it is assumed that the instance will write to the free pages and blocks (pollute the pages) at a lower rate than these are being copied.

    Completion Time

    In some cases the virtual machine can write to different memory pages / disk blocks at a higher rate than these can be copied, which will prevent the migration process from completing in a reasonable amount of time. In this case, live migration will be aborted if it is running for a long period of time. The timeout is calculated base on the size of the VMI, it's memory and the ephemeral disks that are needed to be copied. The configurable parameter completionTimeoutPerGiB, which defaults to 800s is the time for GiB of data to wait for the migration to be completed before aborting it. A VMI with 8Gib of memory will time out after 6400 seconds.

    Progress Timeout

    A VM Live Migration will also be aborted when it notices that copying memory doesn't make any progress. The time to wait for live migration to make progress in transferring data is configurable by the progressTimeout parameter, which defaults to 150 seconds.

    VM Live Migration Configurations

    Changing Cluster Wide Migration Limits

    KubeVirt puts some limits in place so that migrations don't overwhelm the cluster. By default, it is to only run 5 migrations in parallel with an additional limit of a maximum of 2 outbound migrations per node. Finally, every migration is limited to a bandwidth of 64MiB/s.

    You can change these values in the kubevirt CR:

        apiVersion: kubevirt.io/v1
    kind: Kubevirt
    metadata:
    name: kubevirt
    namespace: kubevirt
    spec:
    configuration:
    migrations:
    parallelMigrationsPerCluster: 5
    parallelOutboundMigrationsPerNode: 2
    bandwidthPerMigration: 64Mi
    completionTimeoutPerGiB: 800
    progressTimeout: 150
    disableTLS: false
    nodeDrainTaintKey: "kubevirt.io/drain"
    allowAutoConverge: false ---------------------> related to: Auto-converge
    allowPostCopy: false -------------------------> related to: Post-copy
    unsafeMigrationOverride: false

    Remember that most of these configurations can be overridden and fine-tuned to a specified group of VMs. For more information, please refer to the Migration Policies section below.

    Migration Policies

    Migration policies provides a new way of applying migration configurations to Virtual Machines. The policies can refine Kubevirt CR's MigrationConfiguration that sets the cluster-wide migration configurations. This way, the cluster-wide settings default how the migration policy can be refined (i.e., changed, removed, or added).

    Remember that migration policies are in version v1alpha1. This means that this API is not fully stable yet and that APIs may change in the future.

    Migration Configurations

    Currently, the MigrationPolicy spec only includes the following configurations from Kubevirt CR's MigrationConfiguration. (In the future, more configurations that aren't part of Kubevirt CR will be added):

    apiVersion: migrations.kubevirt.io/v1alpha1
    kind: MigrationPolicy
    spec:
    allowAutoConverge: true
    bandwidthPerMigration: 217Ki
    completionTimeoutPerGiB: 23
    allowPostCopy: false

    All the above fields are optional. When omitted, the configuration will be applied as defined in KubevirtCR's MigrationConfiguration. This way, KubevirtCR will serve as a configurable set of defaults for both VMs that are not bound to any MigrationPolicy and VMs that are bound to a MigrationPolicy that does not define all fields of the configurations.

    Matching Policies to VMs

    Next in the spec are the selectors defining the group of VMs to apply the policy. The options to do so are the following.

    This policy applies to the VMs in namespaces that have all the required labels:

    apiVersion: migrations.kubevirt.io/v1alpha1
    kind: MigrationPolicy
    spec:
    selectors:
    namespaceSelector:
    hpc-workloads: true # Matches a key and a value

    The policy below applies to the VMs that have all the required labels:

    apiVersion: migrations.kubevirt.io/v1alpha1
    kind: MigrationPolicy
    spec:
    selectors:
    virtualMachineInstanceSelector:
    workload-type: db # Matches a key and a value

    References

    Documents

    Libvirt Guest Migration

    Libvirt has a chapter to describe the pricipal of VM/Guest Live Migration.

    https://libvirt.org/migration.html

    Kubevirt Live Migration

    https://kubevirt.io/user-guide/operations/live_migration/

    Source Code

    The VM Live Migration related configuration options are passed to each layer correspondingly.

    Kubevirt

    https://github.com/kubevirt/kubevirt/blob/d425593ae392111dab80403ef0cde82625e37653/pkg/virt-launcher/virtwrap/live-migration-source.go#L103

    ...
    import "libvirt.org/go/libvirt"

    ...

    func generateMigrationFlags(isBlockMigration, migratePaused bool, options *cmdclient.MigrationOptions) libvirt.DomainMigrateFlags {
    ...
    if options.AllowAutoConverge {
    migrateFlags |= libvirt.MIGRATE_AUTO_CONVERGE
    }
    if options.AllowPostCopy {
    migrateFlags |= libvirt.MIGRATE_POSTCOPY
    }
    ...
    }

    Go Package Libvirt

    https://pkg.go.dev/libvirt.org/go/libvirt

    const (
    ...
    MIGRATE_AUTO_CONVERGE = DomainMigrateFlags(C.VIR_MIGRATE_AUTO_CONVERGE)
    MIGRATE_RDMA_PIN_ALL = DomainMigrateFlags(C.VIR_MIGRATE_RDMA_PIN_ALL)
    MIGRATE_POSTCOPY = DomainMigrateFlags(C.VIR_MIGRATE_POSTCOPY)
    ...
    )

    Libvirt

    https://github.com/libvirt/libvirt/blob/bfe53e9145cd5996a791c5caff0686572b850f82/include/libvirt/libvirt-domain.h#L1030

        /* Enable algorithms that ensure a live migration will eventually converge.
    * This usually means the domain will be slowed down to make sure it does
    * not change its memory faster than a hypervisor can transfer the changed
    * memory to the destination host. VIR_MIGRATE_PARAM_AUTO_CONVERGE_*
    * parameters can be used to tune the algorithm.
    *
    * Since: 1.2.3
    */
    VIR_MIGRATE_AUTO_CONVERGE = (1 << 13),
    ...
    /* Setting the VIR_MIGRATE_POSTCOPY flag tells libvirt to enable post-copy
    * migration. However, the migration will start normally and
    * virDomainMigrateStartPostCopy needs to be called to switch it into the
    * post-copy mode. See virDomainMigrateStartPostCopy for more details.
    *
    * Since: 1.3.3
    */
    VIR_MIGRATE_POSTCOPY = (1 << 15),
    - + \ No newline at end of file diff --git a/kb/tags/priority-class/index.html b/kb/tags/priority-class/index.html index 0c2881a6..1faab2aa 100644 --- a/kb/tags/priority-class/index.html +++ b/kb/tags/priority-class/index.html @@ -9,13 +9,13 @@ One post tagged with "priority class" | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    One post tagged with "priority class"

    View All Tags

    · 7 min read
    Kiefer Chang

    Harvester v1.2.0 introduces a new enhancement where Longhorn system-managed components in newly-deployed clusters are automatically assigned a system-cluster-critical priority class by default. However, when upgrading your Harvester clusters from previous versions, you may notice that Longhorn system-managed components do not have any priority class set.

    This behavior is intentional and aimed at supporting zero-downtime upgrades. Longhorn does not allow changing the priority-class setting when attached volumes exist. For more details, please refer to Setting Priority Class During Longhorn Installation).

    This article explains how to manually configure priority classes for Longhorn system-managed components after upgrading your Harvester cluster, ensuring that your Longhorn components have the appropriate priority class assigned and maintaining the stability and performance of your system.

    Stop all virtual machines

    Stop all virtual machines (VMs) to detach all volumes. Please back up any work before doing this.

    1. Login to a Harvester controller node and become root.

    2. Get all running VMs and write down their namespaces and names:

      kubectl get vmi -A

      Alternatively, you can get this information by backing up the Virtual Machine Instance (VMI) manifests with the following command:

      kubectl get vmi -A -o json > vmi-backup.json
    3. Shut down all VMs. Log in to all running VMs and shut them down gracefully (recommended). Or use the following command to send shutdown signals to all VMs:

      kubectl get vmi -A -o json | jq -r '.items[] | [.metadata.name, .metadata.namespace] | @tsv' | while IFS=$'\t' read -r name namespace; do
      if [ -z "$name" ]; then
      break
      fi
      echo "Stop ${namespace}/${name}"
      virtctl stop $name -n $namespace
      done
      note

      You can also stop all VMs from the Harvester UI:

      1. Go to the Virtual Machines page.
      2. For each VM, select > Stop.
    4. Ensure there are no running VMs:

      Run the command:

      kubectl get vmi -A

      The above command must return:

      No resources found

    Scale down monitoring pods

    1. Scale down the Prometheus deployment. Run the following command and wait for all Prometheus pods to terminate:

      kubectl patch -n cattle-monitoring-system prometheus/rancher-monitoring-prometheus --patch '{"spec": {"replicas": 0}}' --type merge && \
      sleep 5 && \
      kubectl rollout status --watch=true -n cattle-monitoring-system statefulset/prometheus-rancher-monitoring-prometheus

      A sample output looks like this:

      prometheus.monitoring.coreos.com/rancher-monitoring-prometheus patched
      statefulset rolling update complete 0 pods at revision prometheus-rancher-monitoring-prometheus-cbf6bd5f7...
    2. Scale down the AlertManager deployment. Run the following command and wait for all AlertManager pods to terminate:

      kubectl patch -n cattle-monitoring-system alertmanager/rancher-monitoring-alertmanager --patch '{"spec": {"replicas": 0}}' --type merge && \
      sleep 5 && \
      kubectl rollout status --watch=true -n cattle-monitoring-system statefulset/alertmanager-rancher-monitoring-alertmanager

      A sample output looks like this:

      alertmanager.monitoring.coreos.com/rancher-monitoring-alertmanager patched
      statefulset rolling update complete 0 pods at revision alertmanager-rancher-monitoring-alertmanager-c8c459dff...
    3. Scale down the Grafana deployment. Run the following command and wait for all Grafana pods to terminate:

      kubectl scale --replicas=0 deployment/rancher-monitoring-grafana -n cattle-monitoring-system && \
      sleep 5 && \
      kubectl rollout status --watch=true -n cattle-monitoring-system deployment/rancher-monitoring-grafana

      A sample output looks like this:

      deployment.apps/rancher-monitoring-grafana scaled
      deployment "rancher-monitoring-grafana" successfully rolled out

    Scale down vm-import-controller pods

    1. Check if the vm-import-controller addon is enabled and configured with a persistent volume with the following command:

      kubectl get pvc -n harvester-system harvester-vm-import-controller

      If the above command returns an output like this, you must scale down the vm-import-controller pod. Otherwise, you can skip the following step.

      NAME                             STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLASS         AGE
      harvester-vm-import-controller Bound pvc-eb23e838-4c64-4650-bd8f-ba7075ab0559 200Gi RWO harvester-longhorn 2m53s
    2. Scale down the vm-import-controller pods with the following command:

      kubectl scale --replicas=0 deployment/harvester-vm-import-controller -n harvester-system && \
      sleep 5 && \
      kubectl rollout status --watch=true -n harvester-system deployment/harvester-vm-import-controller

      A sample output looks like this:

      deployment.apps/harvester-vm-import-controller scaled
      deployment "harvester-vm-import-controller" successfully rolled out

    Set the priority-class setting

    1. Before applying the priority-class setting, you need to verify all volumes are detached. Run the following command to verify the STATE of each volume is detached:

      kubectl get volumes.longhorn.io -A

      Verify the output looks like this:

      NAMESPACE         NAME                                       STATE      ROBUSTNESS   SCHEDULED   SIZE           NODE   AGE
      longhorn-system pvc-5743fd02-17a3-4403-b0d3-0e9b401cceed detached unknown 5368709120 15d
      longhorn-system pvc-7e389fe8-984c-4049-9ba8-5b797cb17278 detached unknown 53687091200 15d
      longhorn-system pvc-8df64e54-ecdb-4d4e-8bab-28d81e316b8b detached unknown 2147483648 15d
      longhorn-system pvc-eb23e838-4c64-4650-bd8f-ba7075ab0559 detached unknown 214748364800 11m
    2. Set the priority-class setting with the following command:

      kubectl patch -n longhorn-system settings.longhorn.io priority-class --patch '{"value": "system-cluster-critical"}' --type merge

      Longhorn system-managed pods will restart and then you need to check if all the system-managed components have a priority class set:

      Get the value of the priority class system-cluster-critical:

      kubectl get priorityclass system-cluster-critical

      Verify the output looks like this:

      NAME                      VALUE        GLOBAL-DEFAULT   AGE
      system-cluster-critical 2000000000 false 15d
    3. Use the following command to get pods' priority in the longhorn-system namespace:

      kubectl get pods -n longhorn-system -o custom-columns="Name":metadata.name,"Priority":.spec.priority
    4. Verify all system-managed components' pods have the correct priority. System-managed components include:

      • csi-attacher
      • csi-provisioner
      • csi-resizer
      • csi-snapshotter
      • engine-image-ei
      • instance-manager-e
      • instance-manager-r
      • longhorn-csi-plugin

    Scale up vm-import-controller pods

    If you scale down the vm-import-controller pods, you must scale it up again.

    1. Scale up the vm-import-controller pod. Run the command:

      kubectl scale --replicas=1 deployment/harvester-vm-import-controller -n harvester-system && \
      sleep 5 && \
      kubectl rollout status --watch=true -n harvester-system deployment/harvester-vm-import-controller

      A sample output looks like this:

      deployment.apps/harvester-vm-import-controller scaled
      Waiting for deployment "harvester-vm-import-controller" rollout to finish: 0 of 1 updated replicas are available...
      deployment "harvester-vm-import-controller" successfully rolled out
    2. Verify vm-import-controller is running using the following command:

      kubectl get pods --selector app.kubernetes.io/instance=vm-import-controller -A

      A sample output looks like this, the pod's STATUS must be Running:

      NAMESPACE          NAME                                              READY   STATUS    RESTARTS   AGE
      harvester-system harvester-vm-import-controller-6bd8f44f55-m9k86 1/1 Running 0 4m53s

    Scale up monitoring pods

    1. Scale up the Prometheus deployment. Run the following command and wait for all Prometheus pods to roll out:

      kubectl patch -n cattle-monitoring-system prometheus/rancher-monitoring-prometheus --patch '{"spec": {"replicas": 1}}' --type merge && \
      sleep 5 && \
      kubectl rollout status --watch=true -n cattle-monitoring-system statefulset/prometheus-rancher-monitoring-prometheus

      A sample output looks like:

      prometheus.monitoring.coreos.com/rancher-monitoring-prometheus patched
      Waiting for 1 pods to be ready...
      statefulset rolling update complete 1 pods at revision prometheus-rancher-monitoring-prometheus-cbf6bd5f7...
    2. Scale down the AlertManager deployment. Run the following command and wait for all AlertManager pods to roll out:

      kubectl patch -n cattle-monitoring-system alertmanager/rancher-monitoring-alertmanager --patch '{"spec": {"replicas": 1}}' --type merge && \
      sleep 5 && \
      kubectl rollout status --watch=true -n cattle-monitoring-system statefulset/alertmanager-rancher-monitoring-alertmanager

      A sample output looks like this:

      alertmanager.monitoring.coreos.com/rancher-monitoring-alertmanager patched
      Waiting for 1 pods to be ready...
      statefulset rolling update complete 1 pods at revision alertmanager-rancher-monitoring-alertmanager-c8bd4466c...
    3. Scale down the Grafana deployment. Run the following command and wait for all Grafana pods to roll out:

      kubectl scale --replicas=1 deployment/rancher-monitoring-grafana -n cattle-monitoring-system && \
      sleep 5 && \
      kubectl rollout status --watch=true -n cattle-monitoring-system deployment/rancher-monitoring-grafana

      A sample output looks like this:

      deployment.apps/rancher-monitoring-grafana scaled
      Waiting for deployment "rancher-monitoring-grafana" rollout to finish: 0 of 1 updated replicas are available...
      deployment "rancher-monitoring-grafana" successfully rolled out

    Start virtual machines

    1. Start a VM with the command:

      virtctl start $name -n $namespace

      Replace $name with the VM's name and $namespace with the VM's namespace. You can list all virtual machines with the command:

      kubectl get vms -A
      note

      You can also stop all VMs from the Harvester UI:

      1. Go to the Virtual Machines page.
      2. For each VM, select > Start.

      Alternatively, you can start all running VMs with the following command:

      cat vmi-backup.json | jq -r '.items[] | [.metadata.name, .metadata.namespace] | @tsv' | while IFS=$'\t' read -r name namespace; do
      if [ -z "$name" ]; then
      break
      fi
      echo "Start ${namespace}/${name}"
      virtctl start $name -n $namespace || true
      done
    - + \ No newline at end of file diff --git a/kb/tags/reserved-resource/index.html b/kb/tags/reserved-resource/index.html index c13d9bfa..6187165d 100644 --- a/kb/tags/reserved-resource/index.html +++ b/kb/tags/reserved-resource/index.html @@ -9,13 +9,13 @@ One post tagged with "reserved resource" | The open-source hyperconverged infrastructure solution for a cloud-native world - +
    -

    One post tagged with "reserved resource"

    View All Tags

    · 3 min read
    Jian Wang

    Harvester calculates the resource metrics using data that is dynamically collected from the system. Host-level resource metrics are calculated and then aggregated to obtain the cluster-level metrics.

    You can view resource-related metrics on the Harvester UI.

    • Hosts screen: Displays host-level metrics

      host level resources metrics

    • Dashboard screen: Displays cluster-level metrics

      cluster level resources metrics

    CPU and Memory

    The following sections describe the data sources and calculation methods for CPU and memory resources.

    • Resource capacity: Baseline data
    • Resource usage: Data source for the Used field on the Hosts screen
    • Resource reservation: Data source for the Reserved field on the Hosts screen

    Resource Capacity

    In Kubernetes, a Node object is created for each host.

    The .status.allocatable.cpu and .status.allocatable.memory represent the available CPU and Memory resources of a host.

    # kubectl get nodes -A -oyaml
    apiVersion: v1
    items:
    - apiVersion: v1
    kind: Node
    metadata:
    ..
    management.cattle.io/pod-limits: '{"cpu":"12715m","devices.kubevirt.io/kvm":"1","devices.kubevirt.io/tun":"1","devices.kubevirt.io/vhost-net":"1","memory":"17104951040"}'
    management.cattle.io/pod-requests: '{"cpu":"5657m","devices.kubevirt.io/kvm":"1","devices.kubevirt.io/tun":"1","devices.kubevirt.io/vhost-net":"1","ephemeral-storage":"50M","memory":"9155862208","pods":"78"}'
    node.alpha.kubernetes.io/ttl: "0"
    ..
    name: harv41
    resourceVersion: "2170215"
    uid: b6f5850a-2fbc-4aef-8fbe-121dfb671b67
    spec:
    podCIDR: 10.52.0.0/24
    podCIDRs:
    - 10.52.0.0/24
    providerID: rke2://harv41
    status:
    addresses:
    - address: 192.168.122.141
    type: InternalIP
    - address: harv41
    type: Hostname
    allocatable:
    cpu: "10"
    devices.kubevirt.io/kvm: 1k
    devices.kubevirt.io/tun: 1k
    devices.kubevirt.io/vhost-net: 1k
    ephemeral-storage: "149527126718"
    hugepages-1Gi: "0"
    hugepages-2Mi: "0"
    memory: 20464216Ki
    pods: "200"
    capacity:
    cpu: "10"
    devices.kubevirt.io/kvm: 1k
    devices.kubevirt.io/tun: 1k
    devices.kubevirt.io/vhost-net: 1k
    ephemeral-storage: 153707984Ki
    hugepages-1Gi: "0"
    hugepages-2Mi: "0"
    memory: 20464216Ki
    pods: "200"

    Resource Usage

    CPU and memory usage data is continuously collected and stored in the NodeMetrics object. Harvester reads the data from usage.cpu and usage.memory.

    # kubectl get NodeMetrics -A -oyaml
    apiVersion: v1
    items:
    - apiVersion: metrics.k8s.io/v1beta1
    kind: NodeMetrics
    metadata:
    ...
    name: harv41
    timestamp: "2024-01-23T12:04:44Z"
    usage:
    cpu: 891736742n
    memory: 9845008Ki
    window: 10.149s

    Resource Reservation

    Harvester dynamically calculates the resource limits and requests of all pods running on a host, and updates the information to the annotations of the NodeMetrics object.

          management.cattle.io/pod-limits: '{"cpu":"12715m",...,"memory":"17104951040"}'
    management.cattle.io/pod-requests: '{"cpu":"5657m",...,"memory":"9155862208"}'

    For more information, see Requests and Limits in the Kubernetes documentation.

    Storage

    Longhorn is the default Container Storage Interface (CSI) driver of Harvester, providing storage management features such as distributed block storage and tiering.

    Reserved Storage in Longhorn

    Longhorn allows you to specify the percentage of disk space that is not allocated to the default disk on each new Longhorn node. The default value is "30". For more information, see Storage Reserved Percentage For Default Disk in the Longhorn documentation.

    Depending on the disk size, you can modify the default value using the embedded Longhorn UI.

    ::: note

    Before changing the settings, read the Longhorn documentation carefully.

    :::

    Data Sources and Calculation

    Harvester uses the following data to calculate metrics for storage resources.

    • Sum of the storageMaximum values of all disks (status.diskStatus.disk-name): Total storage capacity

    • Total storage capacity - Sum of the storageAvailable values of all disks (status.diskStatus.disk-name): Data source for the Used field on the Hosts screen

    • Sum of the storageReserved values of all disks (spec.disks): Data source for the Reserved field on the Hosts screen

    # kubectl get nodes.longhorn.io -n longhorn-system -oyaml

    apiVersion: v1
    items:
    - apiVersion: longhorn.io/v1beta2
    kind: Node
    metadata:
    ..
    name: harv41
    namespace: longhorn-system
    ..
    spec:
    allowScheduling: true
    disks:
    default-disk-ef11a18c36b01132:
    allowScheduling: true
    diskType: filesystem
    evictionRequested: false
    path: /var/lib/harvester/defaultdisk
    storageReserved: 24220101427
    tags: []
    ..
    status:
    ..
    diskStatus:
    default-disk-ef11a18c36b01132:
    ..
    diskType: filesystem
    diskUUID: d2788933-8817-44c6-b688-dee414cc1f73
    scheduledReplica:
    pvc-95561210-c39c-4c2e-ac9a-4a9bd72b3100-r-20affeca: 2147483648
    pvc-9e83b2dc-6a4b-4499-ba70-70dc25b2d9aa-r-4ad05c86: 32212254720
    pvc-bc25be1e-ca4e-4818-a16d-48353a0f2f96-r-c7b88c60: 3221225472
    pvc-d9d3e54d-8d67-4740-861e-6373f670f1e4-r-f4c7c338: 2147483648
    pvc-e954b5fe-bbd7-4d44-9866-6ff6684d5708-r-ba6b87b6: 5368709120
    storageAvailable: 77699481600
    storageMaximum: 80733671424
    storageScheduled: 45097156608
    region: ""
    snapshotCheckStatus: {}
    zone: ""
    - +

    One post tagged with "reserved resource"

    View All Tags

    · 3 min read
    Jian Wang

    Harvester calculates the resource metrics using data that is dynamically collected from the system. Host-level resource metrics are calculated and then aggregated to obtain the cluster-level metrics.

    You can view resource-related metrics on the Harvester UI.

    • Hosts screen: Displays host-level metrics

      host level resources metrics

    • Dashboard screen: Displays cluster-level metrics

      cluster level resources metrics

    CPU and Memory

    The following sections describe the data sources and calculation methods for CPU and memory resources.

    • Resource capacity: Baseline data
    • Resource usage: Data source for the Used field on the Hosts screen
    • Resource reservation: Data source for the Reserved field on the Hosts screen

    Resource Capacity

    In Kubernetes, a Node object is created for each host.

    The .status.allocatable.cpu and .status.allocatable.memory represent the available CPU and Memory resources of a host.

    # kubectl get nodes -A -oyaml
    apiVersion: v1
    items:
    - apiVersion: v1
    kind: Node
    metadata:
    ..
    management.cattle.io/pod-limits: '{"cpu":"12715m","devices.kubevirt.io/kvm":"1","devices.kubevirt.io/tun":"1","devices.kubevirt.io/vhost-net":"1","memory":"17104951040"}'
    management.cattle.io/pod-requests: '{"cpu":"5657m","devices.kubevirt.io/kvm":"1","devices.kubevirt.io/tun":"1","devices.kubevirt.io/vhost-net":"1","ephemeral-storage":"50M","memory":"9155862208","pods":"78"}'
    node.alpha.kubernetes.io/ttl: "0"
    ..
    name: harv41
    resourceVersion: "2170215"
    uid: b6f5850a-2fbc-4aef-8fbe-121dfb671b67
    spec:
    podCIDR: 10.52.0.0/24
    podCIDRs:
    - 10.52.0.0/24
    providerID: rke2://harv41
    status:
    addresses:
    - address: 192.168.122.141
    type: InternalIP
    - address: harv41
    type: Hostname
    allocatable:
    cpu: "10"
    devices.kubevirt.io/kvm: 1k
    devices.kubevirt.io/tun: 1k
    devices.kubevirt.io/vhost-net: 1k
    ephemeral-storage: "149527126718"
    hugepages-1Gi: "0"
    hugepages-2Mi: "0"
    memory: 20464216Ki
    pods: "200"
    capacity:
    cpu: "10"
    devices.kubevirt.io/kvm: 1k
    devices.kubevirt.io/tun: 1k
    devices.kubevirt.io/vhost-net: 1k
    ephemeral-storage: 153707984Ki
    hugepages-1Gi: "0"
    hugepages-2Mi: "0"
    memory: 20464216Ki
    pods: "200"

    Resource Usage

    CPU and memory usage data is continuously collected and stored in the NodeMetrics object. Harvester reads the data from usage.cpu and usage.memory.

    # kubectl get NodeMetrics -A -oyaml
    apiVersion: v1
    items:
    - apiVersion: metrics.k8s.io/v1beta1
    kind: NodeMetrics
    metadata:
    ...
    name: harv41
    timestamp: "2024-01-23T12:04:44Z"
    usage:
    cpu: 891736742n
    memory: 9845008Ki
    window: 10.149s

    Resource Reservation

    Harvester dynamically calculates the resource limits and requests of all pods running on a host, and updates the information to the annotations of the NodeMetrics object.

          management.cattle.io/pod-limits: '{"cpu":"12715m",...,"memory":"17104951040"}'
    management.cattle.io/pod-requests: '{"cpu":"5657m",...,"memory":"9155862208"}'

    For more information, see Requests and Limits in the Kubernetes documentation.

    Storage

    Longhorn is the default Container Storage Interface (CSI) driver of Harvester, providing storage management features such as distributed block storage and tiering.

    Reserved Storage in Longhorn

    Longhorn allows you to specify the percentage of disk space that is not allocated to the default disk on each new Longhorn node. The default value is "30". For more information, see Storage Reserved Percentage For Default Disk in the Longhorn documentation.

    Depending on the disk size, you can modify the default value using the embedded Longhorn UI.

    note

    Before changing the settings, read the Longhorn documentation carefully.

    Data Sources and Calculation

    Harvester uses the following data to calculate metrics for storage resources.

    • Sum of the storageMaximum values of all disks (status.diskStatus.disk-name): Total storage capacity

    • Total storage capacity - Sum of the storageAvailable values of all disks (status.diskStatus.disk-name): Data source for the Used field on the Hosts screen

    • Sum of the storageReserved values of all disks (spec.disks): Data source for the Reserved field on the Hosts screen

    # kubectl get nodes.longhorn.io -n longhorn-system -oyaml

    apiVersion: v1
    items:
    - apiVersion: longhorn.io/v1beta2
    kind: Node
    metadata:
    ..
    name: harv41
    namespace: longhorn-system
    ..
    spec:
    allowScheduling: true
    disks:
    default-disk-ef11a18c36b01132:
    allowScheduling: true
    diskType: filesystem
    evictionRequested: false
    path: /var/lib/harvester/defaultdisk
    storageReserved: 24220101427
    tags: []
    ..
    status:
    ..
    diskStatus:
    default-disk-ef11a18c36b01132:
    ..
    diskType: filesystem
    diskUUID: d2788933-8817-44c6-b688-dee414cc1f73
    scheduledReplica:
    pvc-95561210-c39c-4c2e-ac9a-4a9bd72b3100-r-20affeca: 2147483648
    pvc-9e83b2dc-6a4b-4499-ba70-70dc25b2d9aa-r-4ad05c86: 32212254720
    pvc-bc25be1e-ca4e-4818-a16d-48353a0f2f96-r-c7b88c60: 3221225472
    pvc-d9d3e54d-8d67-4740-861e-6373f670f1e4-r-f4c7c338: 2147483648
    pvc-e954b5fe-bbd7-4d44-9866-6ff6684d5708-r-ba6b87b6: 5368709120
    storageAvailable: 77699481600
    storageMaximum: 80733671424
    storageScheduled: 45097156608
    region: ""
    snapshotCheckStatus: {}
    zone: ""
    + \ No newline at end of file diff --git a/kb/tags/resource-metrics/index.html b/kb/tags/resource-metrics/index.html index f7aeae75..b35f62f4 100644 --- a/kb/tags/resource-metrics/index.html +++ b/kb/tags/resource-metrics/index.html @@ -9,13 +9,13 @@ One post tagged with "resource metrics" | The open-source hyperconverged infrastructure solution for a cloud-native world - +
    -

    One post tagged with "resource metrics"

    View All Tags

    · 3 min read
    Jian Wang

    Harvester calculates the resource metrics using data that is dynamically collected from the system. Host-level resource metrics are calculated and then aggregated to obtain the cluster-level metrics.

    You can view resource-related metrics on the Harvester UI.

    • Hosts screen: Displays host-level metrics

      host level resources metrics

    • Dashboard screen: Displays cluster-level metrics

      cluster level resources metrics

    CPU and Memory

    The following sections describe the data sources and calculation methods for CPU and memory resources.

    • Resource capacity: Baseline data
    • Resource usage: Data source for the Used field on the Hosts screen
    • Resource reservation: Data source for the Reserved field on the Hosts screen

    Resource Capacity

    In Kubernetes, a Node object is created for each host.

    The .status.allocatable.cpu and .status.allocatable.memory represent the available CPU and Memory resources of a host.

    # kubectl get nodes -A -oyaml
    apiVersion: v1
    items:
    - apiVersion: v1
    kind: Node
    metadata:
    ..
    management.cattle.io/pod-limits: '{"cpu":"12715m","devices.kubevirt.io/kvm":"1","devices.kubevirt.io/tun":"1","devices.kubevirt.io/vhost-net":"1","memory":"17104951040"}'
    management.cattle.io/pod-requests: '{"cpu":"5657m","devices.kubevirt.io/kvm":"1","devices.kubevirt.io/tun":"1","devices.kubevirt.io/vhost-net":"1","ephemeral-storage":"50M","memory":"9155862208","pods":"78"}'
    node.alpha.kubernetes.io/ttl: "0"
    ..
    name: harv41
    resourceVersion: "2170215"
    uid: b6f5850a-2fbc-4aef-8fbe-121dfb671b67
    spec:
    podCIDR: 10.52.0.0/24
    podCIDRs:
    - 10.52.0.0/24
    providerID: rke2://harv41
    status:
    addresses:
    - address: 192.168.122.141
    type: InternalIP
    - address: harv41
    type: Hostname
    allocatable:
    cpu: "10"
    devices.kubevirt.io/kvm: 1k
    devices.kubevirt.io/tun: 1k
    devices.kubevirt.io/vhost-net: 1k
    ephemeral-storage: "149527126718"
    hugepages-1Gi: "0"
    hugepages-2Mi: "0"
    memory: 20464216Ki
    pods: "200"
    capacity:
    cpu: "10"
    devices.kubevirt.io/kvm: 1k
    devices.kubevirt.io/tun: 1k
    devices.kubevirt.io/vhost-net: 1k
    ephemeral-storage: 153707984Ki
    hugepages-1Gi: "0"
    hugepages-2Mi: "0"
    memory: 20464216Ki
    pods: "200"

    Resource Usage

    CPU and memory usage data is continuously collected and stored in the NodeMetrics object. Harvester reads the data from usage.cpu and usage.memory.

    # kubectl get NodeMetrics -A -oyaml
    apiVersion: v1
    items:
    - apiVersion: metrics.k8s.io/v1beta1
    kind: NodeMetrics
    metadata:
    ...
    name: harv41
    timestamp: "2024-01-23T12:04:44Z"
    usage:
    cpu: 891736742n
    memory: 9845008Ki
    window: 10.149s

    Resource Reservation

    Harvester dynamically calculates the resource limits and requests of all pods running on a host, and updates the information to the annotations of the NodeMetrics object.

          management.cattle.io/pod-limits: '{"cpu":"12715m",...,"memory":"17104951040"}'
    management.cattle.io/pod-requests: '{"cpu":"5657m",...,"memory":"9155862208"}'

    For more information, see Requests and Limits in the Kubernetes documentation.

    Storage

    Longhorn is the default Container Storage Interface (CSI) driver of Harvester, providing storage management features such as distributed block storage and tiering.

    Reserved Storage in Longhorn

    Longhorn allows you to specify the percentage of disk space that is not allocated to the default disk on each new Longhorn node. The default value is "30". For more information, see Storage Reserved Percentage For Default Disk in the Longhorn documentation.

    Depending on the disk size, you can modify the default value using the embedded Longhorn UI.

    ::: note

    Before changing the settings, read the Longhorn documentation carefully.

    :::

    Data Sources and Calculation

    Harvester uses the following data to calculate metrics for storage resources.

    • Sum of the storageMaximum values of all disks (status.diskStatus.disk-name): Total storage capacity

    • Total storage capacity - Sum of the storageAvailable values of all disks (status.diskStatus.disk-name): Data source for the Used field on the Hosts screen

    • Sum of the storageReserved values of all disks (spec.disks): Data source for the Reserved field on the Hosts screen

    # kubectl get nodes.longhorn.io -n longhorn-system -oyaml

    apiVersion: v1
    items:
    - apiVersion: longhorn.io/v1beta2
    kind: Node
    metadata:
    ..
    name: harv41
    namespace: longhorn-system
    ..
    spec:
    allowScheduling: true
    disks:
    default-disk-ef11a18c36b01132:
    allowScheduling: true
    diskType: filesystem
    evictionRequested: false
    path: /var/lib/harvester/defaultdisk
    storageReserved: 24220101427
    tags: []
    ..
    status:
    ..
    diskStatus:
    default-disk-ef11a18c36b01132:
    ..
    diskType: filesystem
    diskUUID: d2788933-8817-44c6-b688-dee414cc1f73
    scheduledReplica:
    pvc-95561210-c39c-4c2e-ac9a-4a9bd72b3100-r-20affeca: 2147483648
    pvc-9e83b2dc-6a4b-4499-ba70-70dc25b2d9aa-r-4ad05c86: 32212254720
    pvc-bc25be1e-ca4e-4818-a16d-48353a0f2f96-r-c7b88c60: 3221225472
    pvc-d9d3e54d-8d67-4740-861e-6373f670f1e4-r-f4c7c338: 2147483648
    pvc-e954b5fe-bbd7-4d44-9866-6ff6684d5708-r-ba6b87b6: 5368709120
    storageAvailable: 77699481600
    storageMaximum: 80733671424
    storageScheduled: 45097156608
    region: ""
    snapshotCheckStatus: {}
    zone: ""
    - +

    One post tagged with "resource metrics"

    View All Tags

    · 3 min read
    Jian Wang

    Harvester calculates the resource metrics using data that is dynamically collected from the system. Host-level resource metrics are calculated and then aggregated to obtain the cluster-level metrics.

    You can view resource-related metrics on the Harvester UI.

    • Hosts screen: Displays host-level metrics

      host level resources metrics

    • Dashboard screen: Displays cluster-level metrics

      cluster level resources metrics

    CPU and Memory

    The following sections describe the data sources and calculation methods for CPU and memory resources.

    • Resource capacity: Baseline data
    • Resource usage: Data source for the Used field on the Hosts screen
    • Resource reservation: Data source for the Reserved field on the Hosts screen

    Resource Capacity

    In Kubernetes, a Node object is created for each host.

    The .status.allocatable.cpu and .status.allocatable.memory represent the available CPU and Memory resources of a host.

    # kubectl get nodes -A -oyaml
    apiVersion: v1
    items:
    - apiVersion: v1
    kind: Node
    metadata:
    ..
    management.cattle.io/pod-limits: '{"cpu":"12715m","devices.kubevirt.io/kvm":"1","devices.kubevirt.io/tun":"1","devices.kubevirt.io/vhost-net":"1","memory":"17104951040"}'
    management.cattle.io/pod-requests: '{"cpu":"5657m","devices.kubevirt.io/kvm":"1","devices.kubevirt.io/tun":"1","devices.kubevirt.io/vhost-net":"1","ephemeral-storage":"50M","memory":"9155862208","pods":"78"}'
    node.alpha.kubernetes.io/ttl: "0"
    ..
    name: harv41
    resourceVersion: "2170215"
    uid: b6f5850a-2fbc-4aef-8fbe-121dfb671b67
    spec:
    podCIDR: 10.52.0.0/24
    podCIDRs:
    - 10.52.0.0/24
    providerID: rke2://harv41
    status:
    addresses:
    - address: 192.168.122.141
    type: InternalIP
    - address: harv41
    type: Hostname
    allocatable:
    cpu: "10"
    devices.kubevirt.io/kvm: 1k
    devices.kubevirt.io/tun: 1k
    devices.kubevirt.io/vhost-net: 1k
    ephemeral-storage: "149527126718"
    hugepages-1Gi: "0"
    hugepages-2Mi: "0"
    memory: 20464216Ki
    pods: "200"
    capacity:
    cpu: "10"
    devices.kubevirt.io/kvm: 1k
    devices.kubevirt.io/tun: 1k
    devices.kubevirt.io/vhost-net: 1k
    ephemeral-storage: 153707984Ki
    hugepages-1Gi: "0"
    hugepages-2Mi: "0"
    memory: 20464216Ki
    pods: "200"

    Resource Usage

    CPU and memory usage data is continuously collected and stored in the NodeMetrics object. Harvester reads the data from usage.cpu and usage.memory.

    # kubectl get NodeMetrics -A -oyaml
    apiVersion: v1
    items:
    - apiVersion: metrics.k8s.io/v1beta1
    kind: NodeMetrics
    metadata:
    ...
    name: harv41
    timestamp: "2024-01-23T12:04:44Z"
    usage:
    cpu: 891736742n
    memory: 9845008Ki
    window: 10.149s

    Resource Reservation

    Harvester dynamically calculates the resource limits and requests of all pods running on a host, and updates the information to the annotations of the NodeMetrics object.

          management.cattle.io/pod-limits: '{"cpu":"12715m",...,"memory":"17104951040"}'
    management.cattle.io/pod-requests: '{"cpu":"5657m",...,"memory":"9155862208"}'

    For more information, see Requests and Limits in the Kubernetes documentation.

    Storage

    Longhorn is the default Container Storage Interface (CSI) driver of Harvester, providing storage management features such as distributed block storage and tiering.

    Reserved Storage in Longhorn

    Longhorn allows you to specify the percentage of disk space that is not allocated to the default disk on each new Longhorn node. The default value is "30". For more information, see Storage Reserved Percentage For Default Disk in the Longhorn documentation.

    Depending on the disk size, you can modify the default value using the embedded Longhorn UI.

    note

    Before changing the settings, read the Longhorn documentation carefully.

    Data Sources and Calculation

    Harvester uses the following data to calculate metrics for storage resources.

    • Sum of the storageMaximum values of all disks (status.diskStatus.disk-name): Total storage capacity

    • Total storage capacity - Sum of the storageAvailable values of all disks (status.diskStatus.disk-name): Data source for the Used field on the Hosts screen

    • Sum of the storageReserved values of all disks (spec.disks): Data source for the Reserved field on the Hosts screen

    # kubectl get nodes.longhorn.io -n longhorn-system -oyaml

    apiVersion: v1
    items:
    - apiVersion: longhorn.io/v1beta2
    kind: Node
    metadata:
    ..
    name: harv41
    namespace: longhorn-system
    ..
    spec:
    allowScheduling: true
    disks:
    default-disk-ef11a18c36b01132:
    allowScheduling: true
    diskType: filesystem
    evictionRequested: false
    path: /var/lib/harvester/defaultdisk
    storageReserved: 24220101427
    tags: []
    ..
    status:
    ..
    diskStatus:
    default-disk-ef11a18c36b01132:
    ..
    diskType: filesystem
    diskUUID: d2788933-8817-44c6-b688-dee414cc1f73
    scheduledReplica:
    pvc-95561210-c39c-4c2e-ac9a-4a9bd72b3100-r-20affeca: 2147483648
    pvc-9e83b2dc-6a4b-4499-ba70-70dc25b2d9aa-r-4ad05c86: 32212254720
    pvc-bc25be1e-ca4e-4818-a16d-48353a0f2f96-r-c7b88c60: 3221225472
    pvc-d9d3e54d-8d67-4740-861e-6373f670f1e4-r-f4c7c338: 2147483648
    pvc-e954b5fe-bbd7-4d44-9866-6ff6684d5708-r-ba6b87b6: 5368709120
    storageAvailable: 77699481600
    storageMaximum: 80733671424
    storageScheduled: 45097156608
    region: ""
    snapshotCheckStatus: {}
    zone: ""
    + \ No newline at end of file diff --git a/kb/tags/rook/index.html b/kb/tags/rook/index.html index 30888e10..fa6f23b8 100644 --- a/kb/tags/rook/index.html +++ b/kb/tags/rook/index.html @@ -9,13 +9,13 @@ One post tagged with "rook" | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    One post tagged with "rook"

    View All Tags

    · 4 min read
    Hang Yu

    Starting with Harvester v1.2.0, it offers the capability to install a Container Storage Interface (CSI) in your Harvester cluster. This allows you to leverage external storage for the Virtual Machine's non-system data disk, giving you the flexibility to use different drivers tailored for specific needs, whether it's for performance optimization or seamless integration with your existing in-house storage solutions.

    It's important to note that, despite this enhancement, the provisioner for the Virtual Machine (VM) image in Harvester still relies on Longhorn. Prior to version 1.2.0, Harvester exclusively supported Longhorn for storing VM data and did not offer support for external storage as a destination for VM data.

    One of the options for integrating external storage with Harvester is Rook, an open-source cloud-native storage orchestrator. Rook provides a robust platform, framework, and support for Ceph storage, enabling seamless integration with cloud-native environments.

    Ceph is a software-defined distributed storage system that offers versatile storage capabilities, including file, block, and object storage. It is designed for large-scale production clusters and can be deployed effectively in such environments.

    Rook simplifies the deployment and management of Ceph, offering self-managing, self-scaling, and self-healing storage services. It leverages Kubernetes resources to automate the deployment, configuration, provisioning, scaling, upgrading, and monitoring of Ceph.

    In this article, we will walk you through the process of installing, configuring, and utilizing Rook to use storage from an existing external Ceph cluster as a data disk for a VM within the Harvester environment.

    Install Harvester Cluster

    Harvester's operating system follows an immutable design, meaning that most OS files revert to their pre-configured state after a reboot. To accommodate Rook Ceph's requirements, you need to add specific persistent paths to the os.persistentStatePaths section in the Harvester configuration. These paths include:

    os:
    persistent_state_paths:
    - /var/lib/rook
    - /var/lib/ceph
    modules:
    - rbd
    - nbd

    After the cluster is installed, refer to How can I access the kubeconfig file of the Harvester cluster? to get the kubeconfig of the Harvester cluster.

    Install Rook to Harvester

    Install Rook to the Harvester cluster by referring to Rook Quickstart.

    curl -fsSLo rook.tar.gz https://github.com/rook/rook/archive/refs/tags/v1.12.2.tar.gz \
    && tar -zxf rook.tar.gz && cd rook-1.12.2/deploy/examples
    # apply configurations ref: https://rook.github.io/docs/rook/v1.12/Getting-Started/example-configurations/
    kubectl apply -f crds.yaml -f common.yaml -f operator.yaml
    kubectl -n rook-ceph wait --for=condition=Available deploy rook-ceph-operator --timeout=10m

    Using an existing external Ceph cluster

    1. Run the python script create-external-cluster-resources.py in the existing external Ceph cluster for creating all users and keys.
    # script help ref: https://www.rook.io/docs/rook/v1.12/CRDs/Cluster/external-cluster/#1-create-all-users-and-keys
    curl -s https://raw.githubusercontent.com/rook/rook/v1.12.2/deploy/examples/create-external-cluster-resources.py > create-external-cluster-resources.py
    python3 create-external-cluster-resources.py --rbd-data-pool-name <pool_name> --namespace rook-ceph-external --format bash
    1. Copy the Bash output.

    Example output:

    export NAMESPACE=rook-ceph-external
    export ROOK_EXTERNAL_FSID=b3b47828-4c60-11ee-be38-51902f85c805
    export ROOK_EXTERNAL_USERNAME=client.healthchecker
    export ROOK_EXTERNAL_CEPH_MON_DATA=ceph-1=192.168.5.99:6789
    export ROOK_EXTERNAL_USER_SECRET=AQDd6/dkFyu/IhAATv/uCMbHtWk4AYK2KXzBhQ==
    export ROOK_EXTERNAL_DASHBOARD_LINK=https://192.168.5.99:8443/
    export CSI_RBD_NODE_SECRET=AQDd6/dk2HsjIxAA06Yw9UcOg0dfwV/9IFBRhA==
    export CSI_RBD_NODE_SECRET_NAME=csi-rbd-node
    export CSI_RBD_PROVISIONER_SECRET=AQDd6/dkEY1kIxAAAzrXZnVRf4x+wDUz1zyaQg==
    export CSI_RBD_PROVISIONER_SECRET_NAME=csi-rbd-provisioner
    export MONITORING_ENDPOINT=192.168.5.99
    export MONITORING_ENDPOINT_PORT=9283
    export RBD_POOL_NAME=test
    export RGW_POOL_PREFIX=default
    1. Consume the external Ceph cluster resources on the Harvester cluster.
    # Paste the above output from create-external-cluster-resources.py into import-env.sh
    vim import-env.sh
    source import-env.sh
    # this script will create a StorageClass ceph-rbd
    source import-external-cluster.sh
    kubectl apply -f common-external.yaml
    kubectl apply -f cluster-external.yaml
    # wait for all pods to become Ready
    watch 'kubectl --namespace rook-ceph get pods'
    1. Create the VolumeSnapshotClass csi-rbdplugin-snapclass-external.
    cat >./csi/rbd/snapshotclass-external.yaml <<EOF
    ---
    apiVersion: snapshot.storage.k8s.io/v1
    kind: VolumeSnapshotClass
    metadata:
    name: csi-rbdplugin-snapclass-external
    driver: rook-ceph.rbd.csi.ceph.com # driver:namespace:operator
    parameters:
    clusterID: rook-ceph-external # namespace:cluster
    csi.storage.k8s.io/snapshotter-secret-name: rook-csi-rbd-provisioner
    csi.storage.k8s.io/snapshotter-secret-namespace: rook-ceph-external # namespace:cluster
    deletionPolicy: Delete
    EOF

    kubectl apply -f ./csi/rbd/snapshotclass-external.yaml

    Configure Harvester Cluster

    Before you can make use of Harvester's Backup & Snapshot features, you need to set up some essential configurations through the Harvester csi-driver-config setting. To set up these configurations, follow these steps:

    1. Login to the Harvester UI, then navigate to Advanced > Settings.
    2. Find and select csi-driver-config, and then click on the > Edit Setting to access the configuration options.
    3. In the settings, set the Provisioner to rook-ceph.rbd.csi.ceph.com.
    4. Next, specify the Volume Snapshot Class Name as csi-rbdplugin-snapclass-external. This setting points to the name of the VolumeSnapshotClass used for creating volume snapshots or VM snapshots.
    5. Similarly, set the Backup Volume Snapshot Class Name to csi-rbdplugin-snapclass-external. This corresponds to the name of the VolumeSnapshotClass responsible for creating VM backups.

    csi-driver-config-external

    Use Rook Ceph in Harvester

    After successfully configuring these settings, you can proceed to utilize the Rook Ceph StorageClass, which is named rook-ceph-block for the internal Ceph cluster or named ceph-rbd for the external Ceph cluster. You can apply this StorageClass when creating an empty volume or adding a new block volume to a VM, enhancing your Harvester cluster's storage capabilities.

    With these configurations in place, your Harvester cluster is ready to make the most of the Rook Ceph storage integration.

    rook-ceph-volume-external

    rook-ceph-vm-external

    - + \ No newline at end of file diff --git a/kb/tags/root/index.html b/kb/tags/root/index.html index e675e77a..2c33740c 100644 --- a/kb/tags/root/index.html +++ b/kb/tags/root/index.html @@ -9,14 +9,14 @@ One post tagged with "root" | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    One post tagged with "root"

    View All Tags

    · 4 min read
    Vicente Cheng

    In earlier versions of Harvester (v1.0.3 and prior), Longhorn volumes may get corrupted during the replica rebuilding process (reference: Analysis: Potential Data/Filesystem Corruption). In Harvester v1.1.0 and later versions, the Longhorn team has fixed this issue. This article covers manual steps you can take to scan the VM's filesystem and repair it if needed.

    Stop The VM And Backup Volume

    Before you scan the filesystem, it is recommend you back up the volume first. For an example, refer to the following steps to stop the VM and backup the volume.

    • Find the target VM.

    finding the target VM

    • Stop the target VM.

    Stop the target VM

    The target VM is stopped and the related volumes are detached. Now go to the Longhorn UI to backup this volume.

    • Enable Developer Tools & Features (Preferences -> Enable Developer Tools & Features).

    Preferences then enable developer mode Enable the developer mode

    • Click the button and select Edit Config to edit the config page of the VM.

    goto edit config page of VM

    • Go to the Volumes tab and select Check volume details.

    link to longhorn volume page

    • Click the dropdown menu on the right side and select 'Attach' to attach the volume again.

    attach this volume again

    • Select the attached node.

    choose the attached node

    • Check the volume attached under Volume Details and select Take Snapshot on this volume page.

    take snapshot on volume page

    • Confirm that the snapshot is ready.

    check the snapshot is ready

    Now that you completed the volume backup, you need to scan and repair the root filesystem.

    Scanning the root filesystem and repairing

    This section will introduce how to scan the filesystem (e.g., XFS, EXT4) using related tools.

    Before scanning, you need to know the filesystem's device/partition.

    • Identify the filesystem's device by checking the major and minor numbers of that device.
    1. Obtain the major and minor numbers from the listed volume information.

      In the following example, the volume name is pvc-ea7536c0-301f-479e-b2a2-e40ddc864b58.

      harvester-node-0:~ # ls /dev/longhorn/pvc-ea7536c0-301f-479e-b2a2-e40ddc864b58 -al
      brw-rw---- 1 root root 8, 0 Oct 23 14:43 /dev/longhorn/pvc-ea7536c0-301f-479e-b2a2-e40ddc864b58

      The output indicates that the major and minor numbers are 8:0.

    2. Obtain the device name from the output of the lsblk command.

      harvester-node-0:~ # lsblk
      NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINTS
      loop0 7:0 0 3G 1 loop /
      sda 8:0 0 40G 0 disk
      ├─sda1 8:1 0 2M 0 part
      ├─sda2 8:2 0 20M 0 part
      └─sda3 8:3 0 40G 0 part

      The output indicates that 8:0 are the major and minor numbers of the device named sda. Therefore, /dev/sda is related to the volume named pvc-ea7536c0-301f-479e-b2a2-e40ddc864b58.

    • You should now know the filesystem's partition. In the example below, sda3 is the filesystem's partition.
    • Use the Filesystem toolbox image to scan and repair.
    # docker run -it --rm --privileged registry.opensuse.org/isv/rancher/harvester/toolbox/main/fs-toolbox:latest -- bash

    Then we try to scan with this target device.

    XFS

    When scanning an XFS filesystem, use the xfs_repair command and specify the problematic partition of the device.

    In the following example, /dev/sda3 is the problematic partition.

    # xfs_repair -n /dev/sda3

    To repair the corrupted partition, run the following command.

    # xfs_repair /dev/sda3

    EXT4

    When scanning a EXT4 filesystem, use the e2fsck command as follows, where the /dev/sde1 is the problematic partition of the device.

    # e2fsck -f /dev/sde1

    To repair the corrupted partition, run the following command.

    # e2fsck -fp /dev/sde1

    After using the 'e2fsck' command, you should also see logs related to scanning and repairing the partition. Scanning and repairing the corrupted partition is successful if there are no errors in these logs.

    Detach and Start VM again.

    After the corrupted partition is scanned and repaired, detach the volume and try to start the related VM again.

    • Detach the volume from the Longhorn UI.

    detach volume on longhorn UI

    • Start the related VM again from the Harvester UI.

    Start VM again

    Your VM should now work normally.

    - + \ No newline at end of file diff --git a/kb/tags/scheduling/index.html b/kb/tags/scheduling/index.html index 73ed28cc..1b7f74d7 100644 --- a/kb/tags/scheduling/index.html +++ b/kb/tags/scheduling/index.html @@ -9,14 +9,14 @@ One post tagged with "scheduling" | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    One post tagged with "scheduling"

    View All Tags

    · 16 min read
    PoAn Yang

    How does Harvester schedule a VM?

    Harvester doesn't directly schedule a VM in Kubernetes, it relies on KubeVirt to create the custom resource VirtualMachine. When the request to create a new VM is sent, a VirtualMachineInstance object is created and it creates the corresponding Pod.

    The whole VM creation processt leverages kube-scheduler, which allows Harvester to use nodeSelector, affinity, and resources request/limitation to influence where a VM will be deployed.

    How does kube-scheduler decide where to deploy a VM?

    First, kube-scheduler finds Nodes available to run a pod. After that, kube-scheduler scores each available Node by a list of plugins like ImageLocality, InterPodAffinity, NodeAffinity, etc.

    Finally, kube-scheduler calculates the scores from the plugins results for each Node, and select the Node with the highest score to deploy the Pod.

    For example, let's say we have a three nodes Harvester cluster with 6 cores CPU and 16G RAM each, and we want to deploy a VM with 1 CPU and 1G RAM (without resources overcommit).

    kube-scheduler will summarize the scores, as displayed in Table 1 below, and will select the node with the highest score, harvester-node-2 in this case, to deploy the VM.

    kube-scheduler logs
    virt-launcher-vm-without-overcommit-75q9b -> harvester-node-0: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9960 memory:15166603264] ,score 0,
    virt-launcher-vm-without-overcommit-75q9b -> harvester-node-1: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5560 memory:6352273408] ,score 45,
    virt-launcher-vm-without-overcommit-75q9b -> harvester-node-2: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5350 memory:5941231616] ,score 46,

    virt-launcher-vm-without-overcommit-75q9b -> harvester-node-0: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9960 memory:15166603264] ,score 4,
    virt-launcher-vm-without-overcommit-75q9b -> harvester-node-1: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5560 memory:6352273408] ,score 34,
    virt-launcher-vm-without-overcommit-75q9b -> harvester-node-2: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5350 memory:5941231616] ,score 37,

    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="ImageLocality" node="harvester-node-0" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="ImageLocality" node="harvester-node-1" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="ImageLocality" node="harvester-node-2" score=54

    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="InterPodAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="InterPodAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="InterPodAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodeResourcesLeastAllocated" node="harvester-node-0" score=4
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodeResourcesLeastAllocated" node="harvester-node-1" score=34
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodeResourcesLeastAllocated" node="harvester-node-2" score=37

    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodeAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodeAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodeAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodePreferAvoidPods" node="harvester-node-0" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodePreferAvoidPods" node="harvester-node-2" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodePreferAvoidPods" node="harvester-node-1" score=1000000

    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="PodTopologySpread" node="harvester-node-0" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="PodTopologySpread" node="harvester-node-1" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="PodTopologySpread" node="harvester-node-2" score=200

    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="TaintToleration" node="harvester-node-0" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="TaintToleration" node="harvester-node-1" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="TaintToleration" node="harvester-node-2" score=100

    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodeResourcesBalancedAllocation" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodeResourcesBalancedAllocation" node="harvester-node-1" score=45
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodeResourcesBalancedAllocation" node="harvester-node-2" score=46

    "Calculated node's final score for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" node="harvester-node-0" score=1000358
    "Calculated node's final score for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" node="harvester-node-1" score=1000433
    "Calculated node's final score for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" node="harvester-node-2" score=1000437

    AssumePodVolumes for pod "default/virt-launcher-vm-without-overcommit-75q9b", node "harvester-node-2"
    AssumePodVolumes for pod "default/virt-launcher-vm-without-overcommit-75q9b", node "harvester-node-2": all PVCs bound and nothing to do
    "Attempting to bind pod to node" pod="default/virt-launcher-vm-without-overcommit-75q9b" node="harvester-node-2"

    Table 1 - kube-scheduler scores example

    harvester-node-0harvester-node-1harvester-node-2
    ImageLocality545454
    InterPodAffinity000
    NodeResourcesLeastAllocated43437
    NodeAffinity000
    NodePreferAvoidPods100000010000001000000
    PodTopologySpread200200200
    TaintToleration100100100
    NodeResourcesBalancedAllocation04546
    Total100035810004331000437

    Why VMs are distributed unevenly with overcommit?

    With resources overcommit, Harvester modifies the resources request. By default, the overcommit configuration is {"cpu": 1600, "memory": 150, "storage": 200}. This means that if we request a VM with 1 CPU and 1G RAM, its resources.requests.cpu will become 62m.

    !!! note The unit suffix m stands for "thousandth of a core."

    To explain it, let's take the case of CPU overcommit. The default value of 1 CPU is equal to 1000m CPU, and with the default overcommit configuration of "cpu": 1600, the CPU resource will be 16x smaller. Here is the calculation: 1000m * 100 / 1600 = 62m.

    Now, we can see how overcommitting influences kube-scheduler scores.

    In this example, we use a three nodes Harvester cluster with 6 cores and 16G RAM each. We will deploy two VMs with 1 CPU and 1G RAM, and we will compare the scores for both cases of "with-overcommit" and "without-overcommit" resources.

    The results of both tables Table 2 and Table 3 can be explained as follow:

    In the "with-overcommit" case, both VMs are deployed on harvester-node-2, however in the "without-overcommit" case, the VM1 is deployed on harvester-node-2, and VM2 is deployed on harvester-node-1.

    If we look at the detailed scores, we'll see a variation of Total Score for harvester-node-2 from 1000459 to 1000461 in the "with-overcommit" case, and 1000437 to 1000382 in the "without-overcommit case". It's because resources overcommit influences request-cpu and request-memory.

    In the "with-overcommit" case, the request-cpu changes from 4412m to 4474m. The difference between the two numbers is 62m, which is what we calculated above. However, in the "without-overcommit" case, we send real requests to kube-scheduler, so the request-cpu changes from 5350m to 6350m.

    Finally, since most plugins give the same scores for each node except NodeResourcesBalancedAllocation and NodeResourcesLeastAllocated, we'll see a difference of these two scores for each node.

    From the results, we can see the overcommit feature influences the final score of each Node, so VMs are distributed unevenly. Although the harvester-node-2 score for VM 2 is higher than VM 1, it's not always increasing. In Table 4, we keep deploying VM with 1 CPU and 1G RAM, and we can see the score of harvester-node-2 starts decreasing from 11th VM. The behavior of kube-scheduler depends on your cluster resources and the workload you deployed.

    kube-scheduler logs for vm1-with-overcommit
    virt-launcher-vm1-with-overcommit-ljlmq -> harvester-node-0: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9022 memory:14807289856] ,score 0,
    virt-launcher-vm1-with-overcommit-ljlmq -> harvester-node-1: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4622 memory:5992960000] ,score 58,
    virt-launcher-vm1-with-overcommit-ljlmq -> harvester-node-2: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4412 memory:5581918208] ,score 59,

    virt-launcher-vm1-with-overcommit-ljlmq -> harvester-node-0: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9022 memory:14807289856] ,score 5,
    virt-launcher-vm1-with-overcommit-ljlmq -> harvester-node-1: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4622 memory:5992960000] ,score 43,
    virt-launcher-vm1-with-overcommit-ljlmq -> harvester-node-2: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4412 memory:5581918208] ,score 46,

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="InterPodAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="InterPodAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="InterPodAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodeResourcesLeastAllocated" node="harvester-node-0" score=5
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodeResourcesLeastAllocated" node="harvester-node-1" score=43
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodeResourcesLeastAllocated" node="harvester-node-2" score=46

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodeAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodeAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodeAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodePreferAvoidPods" node="harvester-node-0" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodePreferAvoidPods" node="harvester-node-1" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodePreferAvoidPods" node="harvester-node-2" score=1000000

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="PodTopologySpread" node="harvester-node-0" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="PodTopologySpread" node="harvester-node-1" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="PodTopologySpread" node="harvester-node-2" score=200

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="TaintToleration" node="harvester-node-0" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="TaintToleration" node="harvester-node-1" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="TaintToleration" node="harvester-node-2" score=100

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodeResourcesBalancedAllocation" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodeResourcesBalancedAllocation" node="harvester-node-1" score=58
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodeResourcesBalancedAllocation" node="harvester-node-2" score=59

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="ImageLocality" node="harvester-node-0" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="ImageLocality" node="harvester-node-1" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="ImageLocality" node="harvester-node-2" score=54

    "Calculated node's final score for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" node="harvester-node-0" score=1000359
    "Calculated node's final score for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" node="harvester-node-1" score=1000455
    "Calculated node's final score for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" node="harvester-node-2" score=1000459

    AssumePodVolumes for pod "default/virt-launcher-vm1-with-overcommit-ljlmq", node "harvester-node-2"
    AssumePodVolumes for pod "default/virt-launcher-vm1-with-overcommit-ljlmq", node "harvester-node-2": all PVCs bound and nothing to do
    "Attempting to bind pod to node" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" node="harvester-node-2"
    kube-scheduler logs for vm2-with-overcommit
    virt-launcher-vm2-with-overcommit-pwrx4 -> harvester-node-0: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9022 memory:14807289856] ,score 0,
    virt-launcher-vm2-with-overcommit-pwrx4 -> harvester-node-1: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4622 memory:5992960000] ,score 58,
    virt-launcher-vm2-with-overcommit-pwrx4 -> harvester-node-2: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4474 memory:6476701696] ,score 64,

    virt-launcher-vm2-with-overcommit-pwrx4 -> harvester-node-0: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9022 memory:14807289856] ,score 5,
    virt-launcher-vm2-with-overcommit-pwrx4 -> harvester-node-1: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4622 memory:5992960000] ,score 43,
    virt-launcher-vm2-with-overcommit-pwrx4 -> harvester-node-2: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4474 memory:6476701696] ,score 43,

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodeAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodeAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodeAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodePreferAvoidPods" node="harvester-node-0" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodePreferAvoidPods" node="harvester-node-1" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodePreferAvoidPods" node="harvester-node-2" score=1000000

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="PodTopologySpread" node="harvester-node-0" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="PodTopologySpread" node="harvester-node-1" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="PodTopologySpread" node="harvester-node-2" score=200

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="TaintToleration" node="harvester-node-0" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="TaintToleration" node="harvester-node-1" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="TaintToleration" node="harvester-node-2" score=100

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodeResourcesBalancedAllocation" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodeResourcesBalancedAllocation" node="harvester-node-1" score=58
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodeResourcesBalancedAllocation" node="harvester-node-2" score=64

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="ImageLocality" node="harvester-node-0" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="ImageLocality" node="harvester-node-1" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="ImageLocality" node="harvester-node-2" score=54

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="InterPodAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="InterPodAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="InterPodAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodeResourcesLeastAllocated" node="harvester-node-0" score=5
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodeResourcesLeastAllocated" node="harvester-node-1" score=43
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodeResourcesLeastAllocated" node="harvester-node-2" score=43

    "Calculated node's final score for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" node="harvester-node-0" score=1000359
    "Calculated node's final score for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" node="harvester-node-1" score=1000455
    "Calculated node's final score for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" node="harvester-node-2" score=1000461

    AssumePodVolumes for pod "default/virt-launcher-vm2-with-overcommit-pwrx4", node "harvester-node-2"
    AssumePodVolumes for pod "default/virt-launcher-vm2-with-overcommit-pwrx4", node "harvester-node-2": all PVCs bound and nothing to do
    "Attempting to bind pod to node" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" node="harvester-node-2"
    kube-scheduler logs for vm1-without-overcommit
    virt-launcher-vm1-with-overcommit-6xqmq -> harvester-node-0: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9960 memory:15166603264] ,score 0,
    virt-launcher-vm1-with-overcommit-6xqmq -> harvester-node-1: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5560 memory:6352273408] ,score 45,
    virt-launcher-vm1-with-overcommit-6xqmq -> harvester-node-2: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5350 memory:5941231616] ,score 46,

    virt-launcher-vm1-with-overcommit-6xqmq -> harvester-node-0: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9960 memory:15166603264] ,score 4,
    virt-launcher-vm1-with-overcommit-6xqmq -> harvester-node-1: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5560 memory:6352273408] ,score 34,
    virt-launcher-vm1-with-overcommit-6xqmq -> harvester-node-2: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5350 memory:5941231616] ,score 37,

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="InterPodAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="InterPodAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="InterPodAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodeResourcesLeastAllocated" node="harvester-node-0" score=4
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodeResourcesLeastAllocated" node="harvester-node-1" score=34
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodeResourcesLeastAllocated" node="harvester-node-2" score=37

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodeAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodeAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodeAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodePreferAvoidPods" node="harvester-node-0" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodePreferAvoidPods" node="harvester-node-1" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodePreferAvoidPods" node="harvester-node-2" score=1000000

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="PodTopologySpread" node="harvester-node-0" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="PodTopologySpread" node="harvester-node-1" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="PodTopologySpread" node="harvester-node-2" score=200

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="TaintToleration" node="harvester-node-0" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="TaintToleration" node="harvester-node-1" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="TaintToleration" node="harvester-node-2" score=100

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodeResourcesBalancedAllocation" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodeResourcesBalancedAllocation" node="harvester-node-1" score=45
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodeResourcesBalancedAllocation" node="harvester-node-2" score=46

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="ImageLocality" node="harvester-node-0" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="ImageLocality" node="harvester-node-1" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="ImageLocality" node="harvester-node-2" score=54

    "Calculated node's final score for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" node="harvester-node-0" score=1000358
    "Calculated node's final score for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" node="harvester-node-1" score=1000433
    "Calculated node's final score for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" node="harvester-node-2" score=1000437

    AssumePodVolumes for pod "default/virt-launcher-vm1-with-overcommit-6xqmq", node "harvester-node-2"
    AssumePodVolumes for pod "default/virt-launcher-vm1-with-overcommit-6xqmq", node "harvester-node-2": all PVCs bound and nothing to do
    "Attempting to bind pod to node" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" node="harvester-node-2"
    kube-scheduler logs for vm2-without-overcommit
    virt-launcher-vm2-without-overcommit-mf5vk -> harvester-node-0: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9960 memory:15166603264] ,score 0,
    virt-launcher-vm2-without-overcommit-mf5vk -> harvester-node-1: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5560 memory:6352273408] ,score 45,
    virt-launcher-vm2-without-overcommit-mf5vk -> harvester-node-2: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:6350 memory:7195328512] ,score 0,

    virt-launcher-vm2-without-overcommit-mf5vk -> harvester-node-0: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9960 memory:15166603264] ,score 4,
    virt-launcher-vm2-without-overcommit-mf5vk -> harvester-node-1: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5560 memory:6352273408] ,score 34,
    virt-launcher-vm2-without-overcommit-mf5vk -> harvester-node-2: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:6350 memory:7195328512] ,score 28,

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="PodTopologySpread" node="harvester-node-0" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="PodTopologySpread" node="harvester-node-1" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="PodTopologySpread" node="harvester-node-2" score=200

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="TaintToleration" node="harvester-node-0" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="TaintToleration" node="harvester-node-1" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="TaintToleration" node="harvester-node-2" score=100

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodeResourcesBalancedAllocation" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodeResourcesBalancedAllocation" node="harvester-node-1" score=45
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodeResourcesBalancedAllocation" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="ImageLocality" node="harvester-node-0" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="ImageLocality" node="harvester-node-1" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="ImageLocality" node="harvester-node-2" score=54

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="InterPodAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="InterPodAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="InterPodAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodeResourcesLeastAllocated" node="harvester-node-0" score=4
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodeResourcesLeastAllocated" node="harvester-node-1" score=34
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodeResourcesLeastAllocated" node="harvester-node-2" score=28

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodeAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodeAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodeAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodePreferAvoidPods" node="harvester-node-0" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodePreferAvoidPods" node="harvester-node-1" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodePreferAvoidPods" node="harvester-node-2" score=1000000

    "Calculated node's final score for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" node="harvester-node-0" score=1000358
    "Calculated node's final score for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" node="harvester-node-1" score=1000433
    "Calculated node's final score for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" node="harvester-node-2" score=1000382

    AssumePodVolumes for pod "default/virt-launcher-vm2-without-overcommit-mf5vk", node "harvester-node-1"
    AssumePodVolumes for pod "default/virt-launcher-vm2-without-overcommit-mf5vk", node "harvester-node-1": all PVCs bound and nothing to do
    "Attempting to bind pod to node" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" node="harvester-node-1"

    Table 2 - With Overcommit

    VM 1 / VM 2harvester-node-0harvester-node-1harvester-node-2
    request-cpu (m)9022 / 90224622 / 46224412 / 4474
    request-memory14807289856 / 148072898565992960000 / 59929600005581918208 / 6476701696
    NodeResourcesBalancedAllocation Score0 / 058 / 5859 / 64
    NodeResourcesLeastAllocated Score5 / 543 / 4346 / 43
    Other Scores1000354 / 10003541000354 / 10003541000354 / 1000354
    Total Score1000359 / 10003591000455 / 10004551000459 / 1000461

    Table 3 - Without Overcommit

    VM 1 / VM 2harvester-node-0harvester-node-1harvester-node-2
    request-cpu (m)9960 / 99605560 / 55605350 / 6350
    request-memory15166603264 / 151666032646352273408 / 63522734085941231616 / 7195328512
    NodeResourcesBalancedAllocation Score0 / 045 / 4546 / 0
    NodeResourcesLeastAllocated Score4 / 434 / 3437 / 28
    Other Scores1000354 / 10003541000354 / 10003541000354 / 1000354
    Total Score1000358 / 10003581000358 / 10004331000437 / 1000382

    Table 4

    Scoreharvester-node-0harvester-node-1harvester-node-2
    VM 1100035910004551000459
    VM 2100035910004551000461
    VM 3100035910004551000462
    VM 4100035910004551000462
    VM 5100035910004551000463
    VM 6100035910004551000465
    VM 7100035910004551000466
    VM 8100035910004551000467
    VM 9100035910004551000469
    VM 10100035910004551000469
    VM 11100035910004551000465
    VM 12100035910004551000457

    How to avoid uneven distribution of VMs?

    There are many plugins in kube-scheduler which we can use to influence the scores. For example, we can add the podAntiAffinity plugin to avoid VMs with the same labels being deployed on the same node.

      affinity:
    podAntiAffinity:
    preferredDuringSchedulingIgnoredDuringExecution:
    - podAffinityTerm:
    labelSelector:
    matchExpressions:
    - key: harvesterhci.io/creator
    operator: Exists
    topologyKey: kubernetes.io/hostname
    weight: 100

    How to see scores in kube-scheduler?

    kube-scheduler is deployed as a static pod in Harvester. The file is under /var/lib/rancher/rke2/agent/pod-manifests/kube-scheduler.yaml in each Management Node. We can add - --v=10 to the kube-scheduler container to show score logs.

    kind: Pod
    metadata:
    labels:
    component: kube-scheduler
    tier: control-plane
    name: kube-scheduler
    namespace: kube-system
    spec:
    containers:
    - command:
    - kube-scheduler
    # ...
    - --v=10
    - + \ No newline at end of file diff --git a/kb/tags/storage/index.html b/kb/tags/storage/index.html index d7e3a483..f418b832 100644 --- a/kb/tags/storage/index.html +++ b/kb/tags/storage/index.html @@ -9,7 +9,7 @@ 2 posts tagged with "storage" | The open-source hyperconverged infrastructure solution for a cloud-native world - + @@ -17,7 +17,7 @@

    2 posts tagged with "storage"

    View All Tags

    · 4 min read
    Vicente Cheng

    In earlier versions of Harvester (v1.0.3 and prior), Longhorn volumes may get corrupted during the replica rebuilding process (reference: Analysis: Potential Data/Filesystem Corruption). In Harvester v1.1.0 and later versions, the Longhorn team has fixed this issue. This article covers manual steps you can take to scan the VM's filesystem and repair it if needed.

    Stop The VM And Backup Volume

    Before you scan the filesystem, it is recommend you back up the volume first. For an example, refer to the following steps to stop the VM and backup the volume.

    • Find the target VM.

    finding the target VM

    • Stop the target VM.

    Stop the target VM

    The target VM is stopped and the related volumes are detached. Now go to the Longhorn UI to backup this volume.

    • Enable Developer Tools & Features (Preferences -> Enable Developer Tools & Features).

    Preferences then enable developer mode Enable the developer mode

    • Click the button and select Edit Config to edit the config page of the VM.

    goto edit config page of VM

    • Go to the Volumes tab and select Check volume details.

    link to longhorn volume page

    • Click the dropdown menu on the right side and select 'Attach' to attach the volume again.

    attach this volume again

    • Select the attached node.

    choose the attached node

    • Check the volume attached under Volume Details and select Take Snapshot on this volume page.

    take snapshot on volume page

    • Confirm that the snapshot is ready.

    check the snapshot is ready

    Now that you completed the volume backup, you need to scan and repair the root filesystem.

    Scanning the root filesystem and repairing

    This section will introduce how to scan the filesystem (e.g., XFS, EXT4) using related tools.

    Before scanning, you need to know the filesystem's device/partition.

    • Identify the filesystem's device by checking the major and minor numbers of that device.
    1. Obtain the major and minor numbers from the listed volume information.

      In the following example, the volume name is pvc-ea7536c0-301f-479e-b2a2-e40ddc864b58.

      harvester-node-0:~ # ls /dev/longhorn/pvc-ea7536c0-301f-479e-b2a2-e40ddc864b58 -al
      brw-rw---- 1 root root 8, 0 Oct 23 14:43 /dev/longhorn/pvc-ea7536c0-301f-479e-b2a2-e40ddc864b58

      The output indicates that the major and minor numbers are 8:0.

    2. Obtain the device name from the output of the lsblk command.

      harvester-node-0:~ # lsblk
      NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINTS
      loop0 7:0 0 3G 1 loop /
      sda 8:0 0 40G 0 disk
      ├─sda1 8:1 0 2M 0 part
      ├─sda2 8:2 0 20M 0 part
      └─sda3 8:3 0 40G 0 part

      The output indicates that 8:0 are the major and minor numbers of the device named sda. Therefore, /dev/sda is related to the volume named pvc-ea7536c0-301f-479e-b2a2-e40ddc864b58.

    • You should now know the filesystem's partition. In the example below, sda3 is the filesystem's partition.
    • Use the Filesystem toolbox image to scan and repair.
    # docker run -it --rm --privileged registry.opensuse.org/isv/rancher/harvester/toolbox/main/fs-toolbox:latest -- bash

    Then we try to scan with this target device.

    XFS

    When scanning an XFS filesystem, use the xfs_repair command and specify the problematic partition of the device.

    In the following example, /dev/sda3 is the problematic partition.

    # xfs_repair -n /dev/sda3

    To repair the corrupted partition, run the following command.

    # xfs_repair /dev/sda3

    EXT4

    When scanning a EXT4 filesystem, use the e2fsck command as follows, where the /dev/sde1 is the problematic partition of the device.

    # e2fsck -f /dev/sde1

    To repair the corrupted partition, run the following command.

    # e2fsck -fp /dev/sde1

    After using the 'e2fsck' command, you should also see logs related to scanning and repairing the partition. Scanning and repairing the corrupted partition is successful if there are no errors in these logs.

    Detach and Start VM again.

    After the corrupted partition is scanned and repaired, detach the volume and try to start the related VM again.

    • Detach the volume from the Longhorn UI.

    detach volume on longhorn UI

    • Start the related VM again from the Harvester UI.

    Start VM again

    Your VM should now work normally.

    · 2 min read
    Kiefer Chang

    Harvester replicates volumes data across disks in a cluster. Before removing a disk, the user needs to evict replicas on the disk to other disks to preserve the volumes' configured availability. For more information about eviction in Longhorn, please check Evicting Replicas on Disabled Disks or Nodes.

    Preparation

    This document describes how to evict Longhorn disks using the kubectl command. Before that, users must ensure the environment is set up correctly. There are two recommended ways to do this:

    1. Log in to any management node and switch to root (sudo -i).
    2. Download Kubeconfig file and use it locally
      • Install kubectl and yq program manually.
      • Open Harvester GUI, click support at the bottom left of the page and click Download KubeConfig to download the Kubeconfig file.
      • Set the Kubeconfig file's path to KUBECONFIG environment variable. For example, export KUBECONFIG=/path/to/kubeconfig.

    Evicting replicas from a disk

    1. List Longhorn nodes (names are identical to Kubernetes nodes):

      kubectl get -n longhorn-system nodes.longhorn.io

      Sample output:

      NAME    READY   ALLOWSCHEDULING   SCHEDULABLE   AGE
      node1 True true True 24d
      node2 True true True 24d
      node3 True true True 24d
    2. List disks on a node. Assume we want to evict replicas of a disk on node1:

      kubectl get -n longhorn-system nodes.longhorn.io node1 -o yaml | yq e '.spec.disks'

      Sample output:

      default-disk-ed7af10f5b8356be:
      allowScheduling: true
      evictionRequested: false
      path: /var/lib/harvester/defaultdisk
      storageReserved: 36900254515
      tags: []
    3. Assume disk default-disk-ed7af10f5b8356be is the target we want to evict replicas out of.

      Edit the node:

      kubectl edit -n longhorn-system nodes.longhorn.io node1 

      Update these two fields and save:

      • spec.disks.<disk_name>.allowScheduling to false
      • spec.disks.<disk_name>.evictionRequested to true

      Sample editing:

      default-disk-ed7af10f5b8356be:
      allowScheduling: false
      evictionRequested: true
      path: /var/lib/harvester/defaultdisk
      storageReserved: 36900254515
      tags: []
    4. Wait for all replicas on the disk to be evicted.

      Get current scheduled replicas on the disk:

      kubectl get -n longhorn-system nodes.longhorn.io node1 -o yaml | yq e '.status.diskStatus.default-disk-ed7af10f5b8356be.scheduledReplica'

      Sample output:

      pvc-86d3d212-d674-4c64-b69b-4a2eb1df2272-r-7b422db7: 5368709120
      pvc-b06f0b09-f30c-4936-8a2a-425b993dd6cb-r-bb0fa6b3: 2147483648
      pvc-b844bcc6-3b06-4367-a136-3909251cb560-r-08d1ab3c: 53687091200
      pvc-ea6e0dff-f446-4a38-916a-b3bea522f51c-r-193ca5c6: 10737418240

      Run the command repeatedly, and the output should eventually become an empty map:

      {}

      This means Longhorn evicts replicas on the disk to other disks.

      note

      If a replica always stays in a disk, please open the Longhorn GUI and check if there is free space on other disks.

    - + \ No newline at end of file diff --git a/kb/tags/strategy/index.html b/kb/tags/strategy/index.html index 5dfcc8af..cb9f06c3 100644 --- a/kb/tags/strategy/index.html +++ b/kb/tags/strategy/index.html @@ -9,13 +9,13 @@ One post tagged with "strategy" | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    One post tagged with "strategy"

    View All Tags

    · 11 min read
    Jian Wang

    In Harvester, the VM Live Migration is well supported by the UI. Please refer to Harvester VM Live Migration for more details.

    The VM Live Migration process is finished smoothly in most cases. However, sometimes the migration may get stuck and not end as expected.

    This article dives into the VM Live Migration process in more detail. There are three main parts:

    • General Process of VM Live Migration
    • VM Live Migration Strategies
    • VM Live Migration Configurations

    Related issues:

    note

    A big part of the following contents are copied from kubevirt document https://kubevirt.io/user-guide/operations/live_migration/, some contents/formats are adjusted to fit in this document.

    General Process of VM Live Migration

    Starting a Migration from Harvester UI

    1. Go to the Virtual Machines page.
    2. Find the virtual machine that you want to migrate and select > Migrate.
    3. Choose the node to which you want to migrate the virtual machine and select Apply.

    After successfully selecting Apply, a CRD VirtualMachineInstanceMigration object is created, and the related controller/operator will start the process.

    Migration CRD Object

    You can also create the CRD VirtualMachineInstanceMigration object manually via kubectl or other tools.

    The example below starts a migration process for a virtual machine instance (VMI) new-vm.

    apiVersion: kubevirt.io/v1
    kind: VirtualMachineInstanceMigration
    metadata:
    name: migration-job
    spec:
    vmiName: new-vm

    Under the hood, the open source projects Kubevirt, Libvirt, QEMU, ... perform most of the VM Live Migration. References.

    Migration Status Reporting

    When starting a virtual machine instance (VMI), it has also been calculated whether the machine is live migratable. The result is being stored in the VMI VMI.status.conditions. The calculation can be based on multiple parameters of the VMI, however, at the moment, the calculation is largely based on the Access Mode of the VMI volumes. Live migration is only permitted when the volume access mode is set to ReadWriteMany. Requests to migrate a non-LiveMigratable VMI will be rejected.

    The reported Migration Method is also being calculated during VMI start. BlockMigration indicates that some of the VMI disks require copying from the source to the destination. LiveMigration means that only the instance memory will be copied.

    Status:
    Conditions:
    Status: True
    Type: LiveMigratable
    Migration Method: BlockMigration

    Migration Status

    The migration progress status is reported in VMI.status. Most importantly, it indicates whether the migration has been completed or failed.

    Below is an example of a successful migration.

    Migration State:
    Completed: true
    End Timestamp: 2019-03-29T03:37:52Z
    Migration Config:
    Completion Timeout Per GiB: 800
    Progress Timeout: 150
    Migration UID: c64d4898-51d3-11e9-b370-525500d15501
    Source Node: node02
    Start Timestamp: 2019-03-29T04:02:47Z
    Target Direct Migration Node Ports:
    35001: 0
    41068: 49152
    38284: 49153
    Target Node: node01
    Target Node Address: 10.128.0.46
    Target Node Domain Detected: true
    Target Pod: virt-launcher-testvmimcbjgw6zrzcmp8wpddvztvzm7x2k6cjbdgktwv8tkq

    VM Live Migration Strategies

    VM Live Migration is a process during which a running Virtual Machine Instance moves to another compute node while the guest workload continues to run and remain accessible.

    Understanding Different VM Live Migration Strategies

    VM Live Migration is a complex process. During a migration, the source VM needs to transfer its whole state (mainly RAM) to the target VM. If there are enough resources available, such as network bandwidth and CPU power, migrations should converge nicely. If this is not the scenario, however, the migration might get stuck without an ability to progress.

    The main factor that affects migrations from the guest perspective is its dirty rate, which is the rate by which the VM dirties memory. Guests with high dirty rate lead to a race during migration. On the one hand, memory would be transferred continuously to the target, and on the other, the same memory would get dirty by the guest. On such scenarios, one could consider to use more advanced migration strategies. Refer to Understanding different migration strategies for more details.

    There are 3 VM Live Migration strategies/policies:

    VM Live Migration Strategy: Pre-copy

    Pre-copy is the default strategy. It should be used for most cases.

    The way it works is as following:

    1. The target VM is created, but the guest keeps running on the source VM.
    2. The source starts sending chunks of VM state (mostly memory) to the target. This continues until all of the state has been transferred to the target.
    3. The guest starts executing on the target VM. 4. The source VM is being removed.

    Pre-copy is the safest and fastest strategy for most cases. Furthermore, it can be easily cancelled, can utilize multithreading, and more. If there is no real reason to use another strategy, this is definitely the strategy to go with.

    However, on some cases migrations might not converge easily, that is, by the time the chunk of source VM state would be received by the target VM, it would already be mutated by the source VM (which is the VM the guest executes on). There are many reasons for migrations to fail converging, such as a high dirty-rate or low resources like network bandwidth and CPU. On such scenarios, see the following alternative strategies below.

    VM Live Migration Strategy: Post-copy

    The way post-copy migrations work is as following:

    1. The target VM is created.
    2. The guest is being run on the target VM.
    3. The source starts sending chunks of VM state (mostly memory) to the target.
    4. When the guest, running on the target VM, would access memory: 1. If the memory exists on the target VM, the guest can access it. 2. Otherwise, the target VM asks for a chunk of memory from the source VM.
    5. Once all of the memory state is updated at the target VM, the source VM is being removed.

    The main idea here is that the guest starts to run immediately on the target VM. This approach has advantages and disadvantages:

    Advantages:

    • The same memory chink is never being transferred twice. This is possible due to the fact that with post-copy it doesn't matter that a page had been dirtied since the guest is already running on the target VM.
    • This means that a high dirty-rate has much less effect.
    • Consumes less network bandwidth.

    Disadvantages:

    • When using post-copy, the VM state has no one source of truth. When the guest (running on the target VM) writes to memory, this memory is one part of the guest's state, but some other parts of it may still be updated only at the source VM. This situation is generally dangerous, since, for example, if either the target or guest VMs crash the state cannot be recovered.
    • Slow warmup: when the guest starts executing, no memory is present at the target VM. Therefore, the guest would have to wait for a lot of memory in a short period of time.
    • Slower than pre-copy on most cases.
    • Harder to cancel a migration.

    VM Live Migration Strategy: Auto-converge

    Auto-converge is a technique to help pre-copy migrations converge faster without changing the core algorithm of how the migration works.

    Since a high dirty-rate is usually the most significant factor for migrations to not converge, auto-converge simply throttles the guest's CPU. If the migration would converge fast enough, the guest's CPU would not be throttled or throttled negligibly. But, if the migration would not converge fast enough, the CPU would be throttled more and more as time goes.

    This technique dramatically increases the probability of the migration converging eventually.

    Observe the VM Live Migration Progress and Result

    Migration Timeouts

    Depending on the type, the live migration process will copy virtual machine memory pages and disk blocks to the destination. During this process non-locked pages and blocks are being copied and become free for the instance to use again. To achieve a successful migration, it is assumed that the instance will write to the free pages and blocks (pollute the pages) at a lower rate than these are being copied.

    Completion Time

    In some cases the virtual machine can write to different memory pages / disk blocks at a higher rate than these can be copied, which will prevent the migration process from completing in a reasonable amount of time. In this case, live migration will be aborted if it is running for a long period of time. The timeout is calculated base on the size of the VMI, it's memory and the ephemeral disks that are needed to be copied. The configurable parameter completionTimeoutPerGiB, which defaults to 800s is the time for GiB of data to wait for the migration to be completed before aborting it. A VMI with 8Gib of memory will time out after 6400 seconds.

    Progress Timeout

    A VM Live Migration will also be aborted when it notices that copying memory doesn't make any progress. The time to wait for live migration to make progress in transferring data is configurable by the progressTimeout parameter, which defaults to 150 seconds.

    VM Live Migration Configurations

    Changing Cluster Wide Migration Limits

    KubeVirt puts some limits in place so that migrations don't overwhelm the cluster. By default, it is to only run 5 migrations in parallel with an additional limit of a maximum of 2 outbound migrations per node. Finally, every migration is limited to a bandwidth of 64MiB/s.

    You can change these values in the kubevirt CR:

        apiVersion: kubevirt.io/v1
    kind: Kubevirt
    metadata:
    name: kubevirt
    namespace: kubevirt
    spec:
    configuration:
    migrations:
    parallelMigrationsPerCluster: 5
    parallelOutboundMigrationsPerNode: 2
    bandwidthPerMigration: 64Mi
    completionTimeoutPerGiB: 800
    progressTimeout: 150
    disableTLS: false
    nodeDrainTaintKey: "kubevirt.io/drain"
    allowAutoConverge: false ---------------------> related to: Auto-converge
    allowPostCopy: false -------------------------> related to: Post-copy
    unsafeMigrationOverride: false

    Remember that most of these configurations can be overridden and fine-tuned to a specified group of VMs. For more information, please refer to the Migration Policies section below.

    Migration Policies

    Migration policies provides a new way of applying migration configurations to Virtual Machines. The policies can refine Kubevirt CR's MigrationConfiguration that sets the cluster-wide migration configurations. This way, the cluster-wide settings default how the migration policy can be refined (i.e., changed, removed, or added).

    Remember that migration policies are in version v1alpha1. This means that this API is not fully stable yet and that APIs may change in the future.

    Migration Configurations

    Currently, the MigrationPolicy spec only includes the following configurations from Kubevirt CR's MigrationConfiguration. (In the future, more configurations that aren't part of Kubevirt CR will be added):

    apiVersion: migrations.kubevirt.io/v1alpha1
    kind: MigrationPolicy
    spec:
    allowAutoConverge: true
    bandwidthPerMigration: 217Ki
    completionTimeoutPerGiB: 23
    allowPostCopy: false

    All the above fields are optional. When omitted, the configuration will be applied as defined in KubevirtCR's MigrationConfiguration. This way, KubevirtCR will serve as a configurable set of defaults for both VMs that are not bound to any MigrationPolicy and VMs that are bound to a MigrationPolicy that does not define all fields of the configurations.

    Matching Policies to VMs

    Next in the spec are the selectors defining the group of VMs to apply the policy. The options to do so are the following.

    This policy applies to the VMs in namespaces that have all the required labels:

    apiVersion: migrations.kubevirt.io/v1alpha1
    kind: MigrationPolicy
    spec:
    selectors:
    namespaceSelector:
    hpc-workloads: true # Matches a key and a value

    The policy below applies to the VMs that have all the required labels:

    apiVersion: migrations.kubevirt.io/v1alpha1
    kind: MigrationPolicy
    spec:
    selectors:
    virtualMachineInstanceSelector:
    workload-type: db # Matches a key and a value

    References

    Documents

    Libvirt Guest Migration

    Libvirt has a chapter to describe the pricipal of VM/Guest Live Migration.

    https://libvirt.org/migration.html

    Kubevirt Live Migration

    https://kubevirt.io/user-guide/operations/live_migration/

    Source Code

    The VM Live Migration related configuration options are passed to each layer correspondingly.

    Kubevirt

    https://github.com/kubevirt/kubevirt/blob/d425593ae392111dab80403ef0cde82625e37653/pkg/virt-launcher/virtwrap/live-migration-source.go#L103

    ...
    import "libvirt.org/go/libvirt"

    ...

    func generateMigrationFlags(isBlockMigration, migratePaused bool, options *cmdclient.MigrationOptions) libvirt.DomainMigrateFlags {
    ...
    if options.AllowAutoConverge {
    migrateFlags |= libvirt.MIGRATE_AUTO_CONVERGE
    }
    if options.AllowPostCopy {
    migrateFlags |= libvirt.MIGRATE_POSTCOPY
    }
    ...
    }

    Go Package Libvirt

    https://pkg.go.dev/libvirt.org/go/libvirt

    const (
    ...
    MIGRATE_AUTO_CONVERGE = DomainMigrateFlags(C.VIR_MIGRATE_AUTO_CONVERGE)
    MIGRATE_RDMA_PIN_ALL = DomainMigrateFlags(C.VIR_MIGRATE_RDMA_PIN_ALL)
    MIGRATE_POSTCOPY = DomainMigrateFlags(C.VIR_MIGRATE_POSTCOPY)
    ...
    )

    Libvirt

    https://github.com/libvirt/libvirt/blob/bfe53e9145cd5996a791c5caff0686572b850f82/include/libvirt/libvirt-domain.h#L1030

        /* Enable algorithms that ensure a live migration will eventually converge.
    * This usually means the domain will be slowed down to make sure it does
    * not change its memory faster than a hypervisor can transfer the changed
    * memory to the destination host. VIR_MIGRATE_PARAM_AUTO_CONVERGE_*
    * parameters can be used to tune the algorithm.
    *
    * Since: 1.2.3
    */
    VIR_MIGRATE_AUTO_CONVERGE = (1 << 13),
    ...
    /* Setting the VIR_MIGRATE_POSTCOPY flag tells libvirt to enable post-copy
    * migration. However, the migration will start normally and
    * virDomainMigrateStartPostCopy needs to be called to switch it into the
    * post-copy mode. See virDomainMigrateStartPostCopy for more details.
    *
    * Since: 1.3.3
    */
    VIR_MIGRATE_POSTCOPY = (1 << 15),
    - + \ No newline at end of file diff --git a/kb/tags/upgrade/index.html b/kb/tags/upgrade/index.html index e6ad6329..0a161ca3 100644 --- a/kb/tags/upgrade/index.html +++ b/kb/tags/upgrade/index.html @@ -9,13 +9,13 @@ One post tagged with "upgrade" | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    One post tagged with "upgrade"

    View All Tags

    · 3 min read
    Canwu Yao

    As Harvester v1.2.0 is released, a new Harvester cloud provider version 0.2.2 is integrated into RKE2 v1.24.15+rke2r1, v1.25.11+rke2r1, v1.26.6+rke2r1, v1.27.3+rke2r1, and newer versions.

    With Harvester v1.2.0, the new Harvester cloud provider offers enhanced load balancing capabilities for guest Kubernetes services. Specifically, it introduces the Harvester IP Pool feature, a built-in IP address management (IPAM) solution for the Harvester load balancer. It allows you to define an IP pool specific to a particular guest cluster by specifying the guest cluster name. For example, you can create an IP pool exclusively for the guest cluster named cluster2:

    image

    However, after upgrading, the feature is not automatically compatible with existing guest Kubernetes clusters, as they do not pass the correct cluster name to the Harvester cloud provider. Refer to issue 4232 for more details. Users can manually upgrade the Harvester cloud provider using Helm as a workaround and provide the correct cluster name after upgrading. However, this would result in a change in the load balancer IPs.

    This article outlines a workaround that allows you to leverage the new IP pool feature while keeping the load balancer IPs unchanged.

    Prerequisites

    • Download the Harvester kubeconfig file from the Harvester UI. If you have imported Harvester into Rancher, do not use the kubeconfig file from the Rancher UI. Refer to Access Harvester Cluster to get the desired one.

    • Download the kubeconfig file for the guest Kubernetes cluster you plan to upgrade. Refer to Accessing Clusters with kubectl from Your Workstation for instructions on how to download the kubeconfig file.

    Steps to Keep Load Balancer IP

    1. Execute the following script before upgrading.

      curl -sfL https://raw.githubusercontent.com/harvester/harvesterhci.io/main/kb/2023-08-21/keepip.sh | sh -s before_upgrade <Harvester-kubeconfig-path> <guest-cluster-kubeconfig-path> <guest-cluster-name> <guest-cluster-nodes-namespace>
      • <Harvester-kubeconfig-path>: Path to the Harvester kubeconfig file.
      • <guest-cluster-kubeconfig-path>: Path to the kubeconfig file of your guest Kubernetes cluster.
      • <guest-cluster-name>: Name of your guest cluster.
      • <guest-cluster-nodes-namespace>: Namespace where the VMs of the guest cluster are located.

      The script will help users copy the DHCP information to the service annotation and modify the IP pool allocated history to make sure the IP is unchanged.

      image

      After executing the script, the load balancer service with DHCP mode will be annotated with the DHCP information. For example:

      apiVersion: v1
      kind: Service
      metadata:
      annotations:
      kube-vip.io/hwaddr: 00:00:6c:4f:18:68
      kube-vip.io/requestedIP: 172.19.105.215
      name: lb0
      namespace: default

      As for the load balancer service with pool mode, the IP pool allocated history will be modified as the new load balancer name. For example:

      apiVersion: loadbalancer.harvesterhci.io/v1beta1
      kind: IPPool
      metadata:
      name: default
      spec:
      ...
      status:
      allocatedHistory:
      192.168.100.2: default/cluster-name-default-lb1-ddc13071 # replace the new load balancer name
    2. Add network selector for the pool.

      For example, the following cluster is under the VM network default/mgmt-untagged. The network selector should be default/mgmt-untagged.

      image

      image

    3. Upgrade the RKE2 cluster in the Rancher UI and select the new version.

      image

    4. Execute the script after upgrading.

      curl -sfL https://raw.githubusercontent.com/harvester/harvesterhci.io/main/kb/2023-08-21/keepip.sh | sh -s after_upgrade <Harvester-kubeconfig-path> <guest-cluster-kubeconfig-path> <guest-cluster-name> <guest-cluster-nodes-namespace>

      image

      In this step, the script wraps the operations to upgrade the Harvester cloud provider to set the cluster name. After the Harvester cloud provider is running, the new Harvester load balancers will be created with the unchanged IPs.

    - + \ No newline at end of file diff --git a/kb/tags/virtual-machine/index.html b/kb/tags/virtual-machine/index.html index c631fc38..eb6b861e 100644 --- a/kb/tags/virtual-machine/index.html +++ b/kb/tags/virtual-machine/index.html @@ -9,13 +9,13 @@ One post tagged with "virtual machine" | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    One post tagged with "virtual machine"

    View All Tags

    · 11 min read
    Jian Wang

    In Harvester, the VM Live Migration is well supported by the UI. Please refer to Harvester VM Live Migration for more details.

    The VM Live Migration process is finished smoothly in most cases. However, sometimes the migration may get stuck and not end as expected.

    This article dives into the VM Live Migration process in more detail. There are three main parts:

    • General Process of VM Live Migration
    • VM Live Migration Strategies
    • VM Live Migration Configurations

    Related issues:

    note

    A big part of the following contents are copied from kubevirt document https://kubevirt.io/user-guide/operations/live_migration/, some contents/formats are adjusted to fit in this document.

    General Process of VM Live Migration

    Starting a Migration from Harvester UI

    1. Go to the Virtual Machines page.
    2. Find the virtual machine that you want to migrate and select > Migrate.
    3. Choose the node to which you want to migrate the virtual machine and select Apply.

    After successfully selecting Apply, a CRD VirtualMachineInstanceMigration object is created, and the related controller/operator will start the process.

    Migration CRD Object

    You can also create the CRD VirtualMachineInstanceMigration object manually via kubectl or other tools.

    The example below starts a migration process for a virtual machine instance (VMI) new-vm.

    apiVersion: kubevirt.io/v1
    kind: VirtualMachineInstanceMigration
    metadata:
    name: migration-job
    spec:
    vmiName: new-vm

    Under the hood, the open source projects Kubevirt, Libvirt, QEMU, ... perform most of the VM Live Migration. References.

    Migration Status Reporting

    When starting a virtual machine instance (VMI), it has also been calculated whether the machine is live migratable. The result is being stored in the VMI VMI.status.conditions. The calculation can be based on multiple parameters of the VMI, however, at the moment, the calculation is largely based on the Access Mode of the VMI volumes. Live migration is only permitted when the volume access mode is set to ReadWriteMany. Requests to migrate a non-LiveMigratable VMI will be rejected.

    The reported Migration Method is also being calculated during VMI start. BlockMigration indicates that some of the VMI disks require copying from the source to the destination. LiveMigration means that only the instance memory will be copied.

    Status:
    Conditions:
    Status: True
    Type: LiveMigratable
    Migration Method: BlockMigration

    Migration Status

    The migration progress status is reported in VMI.status. Most importantly, it indicates whether the migration has been completed or failed.

    Below is an example of a successful migration.

    Migration State:
    Completed: true
    End Timestamp: 2019-03-29T03:37:52Z
    Migration Config:
    Completion Timeout Per GiB: 800
    Progress Timeout: 150
    Migration UID: c64d4898-51d3-11e9-b370-525500d15501
    Source Node: node02
    Start Timestamp: 2019-03-29T04:02:47Z
    Target Direct Migration Node Ports:
    35001: 0
    41068: 49152
    38284: 49153
    Target Node: node01
    Target Node Address: 10.128.0.46
    Target Node Domain Detected: true
    Target Pod: virt-launcher-testvmimcbjgw6zrzcmp8wpddvztvzm7x2k6cjbdgktwv8tkq

    VM Live Migration Strategies

    VM Live Migration is a process during which a running Virtual Machine Instance moves to another compute node while the guest workload continues to run and remain accessible.

    Understanding Different VM Live Migration Strategies

    VM Live Migration is a complex process. During a migration, the source VM needs to transfer its whole state (mainly RAM) to the target VM. If there are enough resources available, such as network bandwidth and CPU power, migrations should converge nicely. If this is not the scenario, however, the migration might get stuck without an ability to progress.

    The main factor that affects migrations from the guest perspective is its dirty rate, which is the rate by which the VM dirties memory. Guests with high dirty rate lead to a race during migration. On the one hand, memory would be transferred continuously to the target, and on the other, the same memory would get dirty by the guest. On such scenarios, one could consider to use more advanced migration strategies. Refer to Understanding different migration strategies for more details.

    There are 3 VM Live Migration strategies/policies:

    VM Live Migration Strategy: Pre-copy

    Pre-copy is the default strategy. It should be used for most cases.

    The way it works is as following:

    1. The target VM is created, but the guest keeps running on the source VM.
    2. The source starts sending chunks of VM state (mostly memory) to the target. This continues until all of the state has been transferred to the target.
    3. The guest starts executing on the target VM. 4. The source VM is being removed.

    Pre-copy is the safest and fastest strategy for most cases. Furthermore, it can be easily cancelled, can utilize multithreading, and more. If there is no real reason to use another strategy, this is definitely the strategy to go with.

    However, on some cases migrations might not converge easily, that is, by the time the chunk of source VM state would be received by the target VM, it would already be mutated by the source VM (which is the VM the guest executes on). There are many reasons for migrations to fail converging, such as a high dirty-rate or low resources like network bandwidth and CPU. On such scenarios, see the following alternative strategies below.

    VM Live Migration Strategy: Post-copy

    The way post-copy migrations work is as following:

    1. The target VM is created.
    2. The guest is being run on the target VM.
    3. The source starts sending chunks of VM state (mostly memory) to the target.
    4. When the guest, running on the target VM, would access memory: 1. If the memory exists on the target VM, the guest can access it. 2. Otherwise, the target VM asks for a chunk of memory from the source VM.
    5. Once all of the memory state is updated at the target VM, the source VM is being removed.

    The main idea here is that the guest starts to run immediately on the target VM. This approach has advantages and disadvantages:

    Advantages:

    • The same memory chink is never being transferred twice. This is possible due to the fact that with post-copy it doesn't matter that a page had been dirtied since the guest is already running on the target VM.
    • This means that a high dirty-rate has much less effect.
    • Consumes less network bandwidth.

    Disadvantages:

    • When using post-copy, the VM state has no one source of truth. When the guest (running on the target VM) writes to memory, this memory is one part of the guest's state, but some other parts of it may still be updated only at the source VM. This situation is generally dangerous, since, for example, if either the target or guest VMs crash the state cannot be recovered.
    • Slow warmup: when the guest starts executing, no memory is present at the target VM. Therefore, the guest would have to wait for a lot of memory in a short period of time.
    • Slower than pre-copy on most cases.
    • Harder to cancel a migration.

    VM Live Migration Strategy: Auto-converge

    Auto-converge is a technique to help pre-copy migrations converge faster without changing the core algorithm of how the migration works.

    Since a high dirty-rate is usually the most significant factor for migrations to not converge, auto-converge simply throttles the guest's CPU. If the migration would converge fast enough, the guest's CPU would not be throttled or throttled negligibly. But, if the migration would not converge fast enough, the CPU would be throttled more and more as time goes.

    This technique dramatically increases the probability of the migration converging eventually.

    Observe the VM Live Migration Progress and Result

    Migration Timeouts

    Depending on the type, the live migration process will copy virtual machine memory pages and disk blocks to the destination. During this process non-locked pages and blocks are being copied and become free for the instance to use again. To achieve a successful migration, it is assumed that the instance will write to the free pages and blocks (pollute the pages) at a lower rate than these are being copied.

    Completion Time

    In some cases the virtual machine can write to different memory pages / disk blocks at a higher rate than these can be copied, which will prevent the migration process from completing in a reasonable amount of time. In this case, live migration will be aborted if it is running for a long period of time. The timeout is calculated base on the size of the VMI, it's memory and the ephemeral disks that are needed to be copied. The configurable parameter completionTimeoutPerGiB, which defaults to 800s is the time for GiB of data to wait for the migration to be completed before aborting it. A VMI with 8Gib of memory will time out after 6400 seconds.

    Progress Timeout

    A VM Live Migration will also be aborted when it notices that copying memory doesn't make any progress. The time to wait for live migration to make progress in transferring data is configurable by the progressTimeout parameter, which defaults to 150 seconds.

    VM Live Migration Configurations

    Changing Cluster Wide Migration Limits

    KubeVirt puts some limits in place so that migrations don't overwhelm the cluster. By default, it is to only run 5 migrations in parallel with an additional limit of a maximum of 2 outbound migrations per node. Finally, every migration is limited to a bandwidth of 64MiB/s.

    You can change these values in the kubevirt CR:

        apiVersion: kubevirt.io/v1
    kind: Kubevirt
    metadata:
    name: kubevirt
    namespace: kubevirt
    spec:
    configuration:
    migrations:
    parallelMigrationsPerCluster: 5
    parallelOutboundMigrationsPerNode: 2
    bandwidthPerMigration: 64Mi
    completionTimeoutPerGiB: 800
    progressTimeout: 150
    disableTLS: false
    nodeDrainTaintKey: "kubevirt.io/drain"
    allowAutoConverge: false ---------------------> related to: Auto-converge
    allowPostCopy: false -------------------------> related to: Post-copy
    unsafeMigrationOverride: false

    Remember that most of these configurations can be overridden and fine-tuned to a specified group of VMs. For more information, please refer to the Migration Policies section below.

    Migration Policies

    Migration policies provides a new way of applying migration configurations to Virtual Machines. The policies can refine Kubevirt CR's MigrationConfiguration that sets the cluster-wide migration configurations. This way, the cluster-wide settings default how the migration policy can be refined (i.e., changed, removed, or added).

    Remember that migration policies are in version v1alpha1. This means that this API is not fully stable yet and that APIs may change in the future.

    Migration Configurations

    Currently, the MigrationPolicy spec only includes the following configurations from Kubevirt CR's MigrationConfiguration. (In the future, more configurations that aren't part of Kubevirt CR will be added):

    apiVersion: migrations.kubevirt.io/v1alpha1
    kind: MigrationPolicy
    spec:
    allowAutoConverge: true
    bandwidthPerMigration: 217Ki
    completionTimeoutPerGiB: 23
    allowPostCopy: false

    All the above fields are optional. When omitted, the configuration will be applied as defined in KubevirtCR's MigrationConfiguration. This way, KubevirtCR will serve as a configurable set of defaults for both VMs that are not bound to any MigrationPolicy and VMs that are bound to a MigrationPolicy that does not define all fields of the configurations.

    Matching Policies to VMs

    Next in the spec are the selectors defining the group of VMs to apply the policy. The options to do so are the following.

    This policy applies to the VMs in namespaces that have all the required labels:

    apiVersion: migrations.kubevirt.io/v1alpha1
    kind: MigrationPolicy
    spec:
    selectors:
    namespaceSelector:
    hpc-workloads: true # Matches a key and a value

    The policy below applies to the VMs that have all the required labels:

    apiVersion: migrations.kubevirt.io/v1alpha1
    kind: MigrationPolicy
    spec:
    selectors:
    virtualMachineInstanceSelector:
    workload-type: db # Matches a key and a value

    References

    Documents

    Libvirt Guest Migration

    Libvirt has a chapter to describe the pricipal of VM/Guest Live Migration.

    https://libvirt.org/migration.html

    Kubevirt Live Migration

    https://kubevirt.io/user-guide/operations/live_migration/

    Source Code

    The VM Live Migration related configuration options are passed to each layer correspondingly.

    Kubevirt

    https://github.com/kubevirt/kubevirt/blob/d425593ae392111dab80403ef0cde82625e37653/pkg/virt-launcher/virtwrap/live-migration-source.go#L103

    ...
    import "libvirt.org/go/libvirt"

    ...

    func generateMigrationFlags(isBlockMigration, migratePaused bool, options *cmdclient.MigrationOptions) libvirt.DomainMigrateFlags {
    ...
    if options.AllowAutoConverge {
    migrateFlags |= libvirt.MIGRATE_AUTO_CONVERGE
    }
    if options.AllowPostCopy {
    migrateFlags |= libvirt.MIGRATE_POSTCOPY
    }
    ...
    }

    Go Package Libvirt

    https://pkg.go.dev/libvirt.org/go/libvirt

    const (
    ...
    MIGRATE_AUTO_CONVERGE = DomainMigrateFlags(C.VIR_MIGRATE_AUTO_CONVERGE)
    MIGRATE_RDMA_PIN_ALL = DomainMigrateFlags(C.VIR_MIGRATE_RDMA_PIN_ALL)
    MIGRATE_POSTCOPY = DomainMigrateFlags(C.VIR_MIGRATE_POSTCOPY)
    ...
    )

    Libvirt

    https://github.com/libvirt/libvirt/blob/bfe53e9145cd5996a791c5caff0686572b850f82/include/libvirt/libvirt-domain.h#L1030

        /* Enable algorithms that ensure a live migration will eventually converge.
    * This usually means the domain will be slowed down to make sure it does
    * not change its memory faster than a hypervisor can transfer the changed
    * memory to the destination host. VIR_MIGRATE_PARAM_AUTO_CONVERGE_*
    * parameters can be used to tune the algorithm.
    *
    * Since: 1.2.3
    */
    VIR_MIGRATE_AUTO_CONVERGE = (1 << 13),
    ...
    /* Setting the VIR_MIGRATE_POSTCOPY flag tells libvirt to enable post-copy
    * migration. However, the migration will start normally and
    * virDomainMigrateStartPostCopy needs to be called to switch it into the
    * post-copy mode. See virDomainMigrateStartPostCopy for more details.
    *
    * Since: 1.3.3
    */
    VIR_MIGRATE_POSTCOPY = (1 << 15),
    - + \ No newline at end of file diff --git a/kb/tags/vm/index.html b/kb/tags/vm/index.html index ab466e2b..234abb01 100644 --- a/kb/tags/vm/index.html +++ b/kb/tags/vm/index.html @@ -9,7 +9,7 @@ 3 posts tagged with "VM" | The open-source hyperconverged infrastructure solution for a cloud-native world - + @@ -17,7 +17,7 @@

    3 posts tagged with "VM"

    View All Tags

    · 11 min read
    Jian Wang

    In Harvester, the VM Live Migration is well supported by the UI. Please refer to Harvester VM Live Migration for more details.

    The VM Live Migration process is finished smoothly in most cases. However, sometimes the migration may get stuck and not end as expected.

    This article dives into the VM Live Migration process in more detail. There are three main parts:

    • General Process of VM Live Migration
    • VM Live Migration Strategies
    • VM Live Migration Configurations

    Related issues:

    note

    A big part of the following contents are copied from kubevirt document https://kubevirt.io/user-guide/operations/live_migration/, some contents/formats are adjusted to fit in this document.

    General Process of VM Live Migration

    Starting a Migration from Harvester UI

    1. Go to the Virtual Machines page.
    2. Find the virtual machine that you want to migrate and select > Migrate.
    3. Choose the node to which you want to migrate the virtual machine and select Apply.

    After successfully selecting Apply, a CRD VirtualMachineInstanceMigration object is created, and the related controller/operator will start the process.

    Migration CRD Object

    You can also create the CRD VirtualMachineInstanceMigration object manually via kubectl or other tools.

    The example below starts a migration process for a virtual machine instance (VMI) new-vm.

    apiVersion: kubevirt.io/v1
    kind: VirtualMachineInstanceMigration
    metadata:
    name: migration-job
    spec:
    vmiName: new-vm

    Under the hood, the open source projects Kubevirt, Libvirt, QEMU, ... perform most of the VM Live Migration. References.

    Migration Status Reporting

    When starting a virtual machine instance (VMI), it has also been calculated whether the machine is live migratable. The result is being stored in the VMI VMI.status.conditions. The calculation can be based on multiple parameters of the VMI, however, at the moment, the calculation is largely based on the Access Mode of the VMI volumes. Live migration is only permitted when the volume access mode is set to ReadWriteMany. Requests to migrate a non-LiveMigratable VMI will be rejected.

    The reported Migration Method is also being calculated during VMI start. BlockMigration indicates that some of the VMI disks require copying from the source to the destination. LiveMigration means that only the instance memory will be copied.

    Status:
    Conditions:
    Status: True
    Type: LiveMigratable
    Migration Method: BlockMigration

    Migration Status

    The migration progress status is reported in VMI.status. Most importantly, it indicates whether the migration has been completed or failed.

    Below is an example of a successful migration.

    Migration State:
    Completed: true
    End Timestamp: 2019-03-29T03:37:52Z
    Migration Config:
    Completion Timeout Per GiB: 800
    Progress Timeout: 150
    Migration UID: c64d4898-51d3-11e9-b370-525500d15501
    Source Node: node02
    Start Timestamp: 2019-03-29T04:02:47Z
    Target Direct Migration Node Ports:
    35001: 0
    41068: 49152
    38284: 49153
    Target Node: node01
    Target Node Address: 10.128.0.46
    Target Node Domain Detected: true
    Target Pod: virt-launcher-testvmimcbjgw6zrzcmp8wpddvztvzm7x2k6cjbdgktwv8tkq

    VM Live Migration Strategies

    VM Live Migration is a process during which a running Virtual Machine Instance moves to another compute node while the guest workload continues to run and remain accessible.

    Understanding Different VM Live Migration Strategies

    VM Live Migration is a complex process. During a migration, the source VM needs to transfer its whole state (mainly RAM) to the target VM. If there are enough resources available, such as network bandwidth and CPU power, migrations should converge nicely. If this is not the scenario, however, the migration might get stuck without an ability to progress.

    The main factor that affects migrations from the guest perspective is its dirty rate, which is the rate by which the VM dirties memory. Guests with high dirty rate lead to a race during migration. On the one hand, memory would be transferred continuously to the target, and on the other, the same memory would get dirty by the guest. On such scenarios, one could consider to use more advanced migration strategies. Refer to Understanding different migration strategies for more details.

    There are 3 VM Live Migration strategies/policies:

    VM Live Migration Strategy: Pre-copy

    Pre-copy is the default strategy. It should be used for most cases.

    The way it works is as following:

    1. The target VM is created, but the guest keeps running on the source VM.
    2. The source starts sending chunks of VM state (mostly memory) to the target. This continues until all of the state has been transferred to the target.
    3. The guest starts executing on the target VM. 4. The source VM is being removed.

    Pre-copy is the safest and fastest strategy for most cases. Furthermore, it can be easily cancelled, can utilize multithreading, and more. If there is no real reason to use another strategy, this is definitely the strategy to go with.

    However, on some cases migrations might not converge easily, that is, by the time the chunk of source VM state would be received by the target VM, it would already be mutated by the source VM (which is the VM the guest executes on). There are many reasons for migrations to fail converging, such as a high dirty-rate or low resources like network bandwidth and CPU. On such scenarios, see the following alternative strategies below.

    VM Live Migration Strategy: Post-copy

    The way post-copy migrations work is as following:

    1. The target VM is created.
    2. The guest is being run on the target VM.
    3. The source starts sending chunks of VM state (mostly memory) to the target.
    4. When the guest, running on the target VM, would access memory: 1. If the memory exists on the target VM, the guest can access it. 2. Otherwise, the target VM asks for a chunk of memory from the source VM.
    5. Once all of the memory state is updated at the target VM, the source VM is being removed.

    The main idea here is that the guest starts to run immediately on the target VM. This approach has advantages and disadvantages:

    Advantages:

    • The same memory chink is never being transferred twice. This is possible due to the fact that with post-copy it doesn't matter that a page had been dirtied since the guest is already running on the target VM.
    • This means that a high dirty-rate has much less effect.
    • Consumes less network bandwidth.

    Disadvantages:

    • When using post-copy, the VM state has no one source of truth. When the guest (running on the target VM) writes to memory, this memory is one part of the guest's state, but some other parts of it may still be updated only at the source VM. This situation is generally dangerous, since, for example, if either the target or guest VMs crash the state cannot be recovered.
    • Slow warmup: when the guest starts executing, no memory is present at the target VM. Therefore, the guest would have to wait for a lot of memory in a short period of time.
    • Slower than pre-copy on most cases.
    • Harder to cancel a migration.

    VM Live Migration Strategy: Auto-converge

    Auto-converge is a technique to help pre-copy migrations converge faster without changing the core algorithm of how the migration works.

    Since a high dirty-rate is usually the most significant factor for migrations to not converge, auto-converge simply throttles the guest's CPU. If the migration would converge fast enough, the guest's CPU would not be throttled or throttled negligibly. But, if the migration would not converge fast enough, the CPU would be throttled more and more as time goes.

    This technique dramatically increases the probability of the migration converging eventually.

    Observe the VM Live Migration Progress and Result

    Migration Timeouts

    Depending on the type, the live migration process will copy virtual machine memory pages and disk blocks to the destination. During this process non-locked pages and blocks are being copied and become free for the instance to use again. To achieve a successful migration, it is assumed that the instance will write to the free pages and blocks (pollute the pages) at a lower rate than these are being copied.

    Completion Time

    In some cases the virtual machine can write to different memory pages / disk blocks at a higher rate than these can be copied, which will prevent the migration process from completing in a reasonable amount of time. In this case, live migration will be aborted if it is running for a long period of time. The timeout is calculated base on the size of the VMI, it's memory and the ephemeral disks that are needed to be copied. The configurable parameter completionTimeoutPerGiB, which defaults to 800s is the time for GiB of data to wait for the migration to be completed before aborting it. A VMI with 8Gib of memory will time out after 6400 seconds.

    Progress Timeout

    A VM Live Migration will also be aborted when it notices that copying memory doesn't make any progress. The time to wait for live migration to make progress in transferring data is configurable by the progressTimeout parameter, which defaults to 150 seconds.

    VM Live Migration Configurations

    Changing Cluster Wide Migration Limits

    KubeVirt puts some limits in place so that migrations don't overwhelm the cluster. By default, it is to only run 5 migrations in parallel with an additional limit of a maximum of 2 outbound migrations per node. Finally, every migration is limited to a bandwidth of 64MiB/s.

    You can change these values in the kubevirt CR:

        apiVersion: kubevirt.io/v1
    kind: Kubevirt
    metadata:
    name: kubevirt
    namespace: kubevirt
    spec:
    configuration:
    migrations:
    parallelMigrationsPerCluster: 5
    parallelOutboundMigrationsPerNode: 2
    bandwidthPerMigration: 64Mi
    completionTimeoutPerGiB: 800
    progressTimeout: 150
    disableTLS: false
    nodeDrainTaintKey: "kubevirt.io/drain"
    allowAutoConverge: false ---------------------> related to: Auto-converge
    allowPostCopy: false -------------------------> related to: Post-copy
    unsafeMigrationOverride: false

    Remember that most of these configurations can be overridden and fine-tuned to a specified group of VMs. For more information, please refer to the Migration Policies section below.

    Migration Policies

    Migration policies provides a new way of applying migration configurations to Virtual Machines. The policies can refine Kubevirt CR's MigrationConfiguration that sets the cluster-wide migration configurations. This way, the cluster-wide settings default how the migration policy can be refined (i.e., changed, removed, or added).

    Remember that migration policies are in version v1alpha1. This means that this API is not fully stable yet and that APIs may change in the future.

    Migration Configurations

    Currently, the MigrationPolicy spec only includes the following configurations from Kubevirt CR's MigrationConfiguration. (In the future, more configurations that aren't part of Kubevirt CR will be added):

    apiVersion: migrations.kubevirt.io/v1alpha1
    kind: MigrationPolicy
    spec:
    allowAutoConverge: true
    bandwidthPerMigration: 217Ki
    completionTimeoutPerGiB: 23
    allowPostCopy: false

    All the above fields are optional. When omitted, the configuration will be applied as defined in KubevirtCR's MigrationConfiguration. This way, KubevirtCR will serve as a configurable set of defaults for both VMs that are not bound to any MigrationPolicy and VMs that are bound to a MigrationPolicy that does not define all fields of the configurations.

    Matching Policies to VMs

    Next in the spec are the selectors defining the group of VMs to apply the policy. The options to do so are the following.

    This policy applies to the VMs in namespaces that have all the required labels:

    apiVersion: migrations.kubevirt.io/v1alpha1
    kind: MigrationPolicy
    spec:
    selectors:
    namespaceSelector:
    hpc-workloads: true # Matches a key and a value

    The policy below applies to the VMs that have all the required labels:

    apiVersion: migrations.kubevirt.io/v1alpha1
    kind: MigrationPolicy
    spec:
    selectors:
    virtualMachineInstanceSelector:
    workload-type: db # Matches a key and a value

    References

    Documents

    Libvirt Guest Migration

    Libvirt has a chapter to describe the pricipal of VM/Guest Live Migration.

    https://libvirt.org/migration.html

    Kubevirt Live Migration

    https://kubevirt.io/user-guide/operations/live_migration/

    Source Code

    The VM Live Migration related configuration options are passed to each layer correspondingly.

    Kubevirt

    https://github.com/kubevirt/kubevirt/blob/d425593ae392111dab80403ef0cde82625e37653/pkg/virt-launcher/virtwrap/live-migration-source.go#L103

    ...
    import "libvirt.org/go/libvirt"

    ...

    func generateMigrationFlags(isBlockMigration, migratePaused bool, options *cmdclient.MigrationOptions) libvirt.DomainMigrateFlags {
    ...
    if options.AllowAutoConverge {
    migrateFlags |= libvirt.MIGRATE_AUTO_CONVERGE
    }
    if options.AllowPostCopy {
    migrateFlags |= libvirt.MIGRATE_POSTCOPY
    }
    ...
    }

    Go Package Libvirt

    https://pkg.go.dev/libvirt.org/go/libvirt

    const (
    ...
    MIGRATE_AUTO_CONVERGE = DomainMigrateFlags(C.VIR_MIGRATE_AUTO_CONVERGE)
    MIGRATE_RDMA_PIN_ALL = DomainMigrateFlags(C.VIR_MIGRATE_RDMA_PIN_ALL)
    MIGRATE_POSTCOPY = DomainMigrateFlags(C.VIR_MIGRATE_POSTCOPY)
    ...
    )

    Libvirt

    https://github.com/libvirt/libvirt/blob/bfe53e9145cd5996a791c5caff0686572b850f82/include/libvirt/libvirt-domain.h#L1030

        /* Enable algorithms that ensure a live migration will eventually converge.
    * This usually means the domain will be slowed down to make sure it does
    * not change its memory faster than a hypervisor can transfer the changed
    * memory to the destination host. VIR_MIGRATE_PARAM_AUTO_CONVERGE_*
    * parameters can be used to tune the algorithm.
    *
    * Since: 1.2.3
    */
    VIR_MIGRATE_AUTO_CONVERGE = (1 << 13),
    ...
    /* Setting the VIR_MIGRATE_POSTCOPY flag tells libvirt to enable post-copy
    * migration. However, the migration will start normally and
    * virDomainMigrateStartPostCopy needs to be called to switch it into the
    * post-copy mode. See virDomainMigrateStartPostCopy for more details.
    *
    * Since: 1.3.3
    */
    VIR_MIGRATE_POSTCOPY = (1 << 15),

    · 4 min read
    Date Huang

    What is the default behavior of a VM with multiple NICs

    In some scenarios, you'll setup two or more NICs in your VM to serve different networking purposes. If all networks are setup by default with DHCP, you might get random connectivity issues. And while it might get fixed after rebooting the VM, it still will lose connection randomly after some period.

    How-to identify connectivity issues

    In a Linux VM, you can use commands from the iproute2 package to identify the default route.

    In your VM, execute the following command:

    ip route show default
    tip

    If you get the access denied error, please run the command using sudo

    The output of this command will only show the default route with the gateway and VM IP of the primary network interface (eth0 in the example below).

    default via <Gateway IP> dev eth0 proto dhcp src <VM IP> metric 100

    Here is the full example:

    $ ip route show default
    default via 192.168.0.254 dev eth0 proto dhcp src 192.168.0.100 metric 100

    However, if the issue covered in this KB occurs, you'll only be able to connect to the VM via the VNC or serial console.

    Once connected, you can run again the same command as before:

    $ ip route show default

    However, this time you'll get a default route with an incorrect gateway IP. For example:

    default via <Incorrect Gateway IP> dev eth0 proto dhcp src <VM's IP> metric 100

    Why do connectivity issues occur randomly

    In a standard setup, cloud-based VMs typically use DHCP for their NICs configuration. It will set an IP and a gateway for each NIC. Lastly, a default route to the gateway IP will also be added, so you can use its IP to connect to the VM.

    However, Linux distributions start multiple DHCP clients at the same time and do not have a priority system. This means that if you have two or more NICs configured with DHCP, the client will enter a race condition to configure the default route. And depending on the currently running Linux distribution DHCP script, there is no guarantee which default route will be configured.

    As the default route might change in every DHCP renewing process or after every OS reboot, this will create network connectivity issues.

    How to avoid the random connectivity issues

    You can easily avoid these connectivity issues by having only one NIC attached to the VM and having only one IP and one gateway configured.

    However, for VMs in more complex infrastructures, it is often not possible to use just one NIC. For example, if your infrastructure has a storage network and a service network. For security reasons, the storage network will be isolated from the service network and have a separate subnet. In this case, you must have two NICs to connect to both the service and storage networks.

    You can choose a solution below that meets your requirements and security policy.

    Disable DHCP on secondary NIC

    As mentioned above, the problem is caused by a race condition between two DHCP clients. One solution to avoid this problem is to disable DHCP for all NICs and configure them with static IPs only. Likewise, you can configure the secondary NIC with a static IP and keep the primary NIC enabled with DHCP.

    1. To configure the primary NIC with a static IP (eth0 in this example), you can edit the file /etc/sysconfig/network/ifcfg-eth0 with the following values:
    BOOTPROTO='static'
    IPADDR='192.168.0.100'
    NETMASK='255.255.255.0'

    Alternatively, if you want to reserve the primary NIC using DHCP (eth0 in this example), use the following values instead:

    BOOTPROTO='dhcp'
    DHCLIENT_SET_DEFAULT_ROUTE='yes'
    1. You need to configure the default route by editing the file /etc/sysconfig/network/ifroute-eth0 (if you configured the primary NIC using DHCP, skip this step):
    # Destination  Dummy/Gateway  Netmask  Interface
    default 192.168.0.254 - eth0
    warning

    Do not put other default route for your secondary NIC

    1. Finally, configure a static IP for the secondary NIC by editing the file /etc/sysconfig/network/ifcfg-eth1:
    BOOTPROTO='static'
    IPADDR='10.0.0.100'
    NETMASK='255.255.255.0'

    Cloud-Init config

    network:
    version: 1
    config:
    - type: physical
    name: eth0
    subnets:
    - type: dhcp
    - type: physical
    name: eth1
    subnets:
    - type: static
    address: 10.0.0.100/24

    Disable secondary NIC default route from DHCP

    If your secondary NIC requires to get its IP from DHCP, you'll need to disable the secondary NIC default route configuration.

    1. Confirm that the primary NIC configures its default route in the file /etc/sysconfig/network/ifcfg-eth0:
    BOOTPROTO='dhcp'
    DHCLIENT_SET_DEFAULT_ROUTE='yes'
    1. Disable the secondary NIC default route configuration by editing the file /etc/sysconfig/network/ifcfg-eth1:
    BOOTPROTO='dhcp'
    DHCLIENT_SET_DEFAULT_ROUTE='no'

    Cloud-Init config

    This solution is not available in Cloud-Init. Cloud-Init didn't allow any option for DHCP.

    · 16 min read
    PoAn Yang

    How does Harvester schedule a VM?

    Harvester doesn't directly schedule a VM in Kubernetes, it relies on KubeVirt to create the custom resource VirtualMachine. When the request to create a new VM is sent, a VirtualMachineInstance object is created and it creates the corresponding Pod.

    The whole VM creation processt leverages kube-scheduler, which allows Harvester to use nodeSelector, affinity, and resources request/limitation to influence where a VM will be deployed.

    How does kube-scheduler decide where to deploy a VM?

    First, kube-scheduler finds Nodes available to run a pod. After that, kube-scheduler scores each available Node by a list of plugins like ImageLocality, InterPodAffinity, NodeAffinity, etc.

    Finally, kube-scheduler calculates the scores from the plugins results for each Node, and select the Node with the highest score to deploy the Pod.

    For example, let's say we have a three nodes Harvester cluster with 6 cores CPU and 16G RAM each, and we want to deploy a VM with 1 CPU and 1G RAM (without resources overcommit).

    kube-scheduler will summarize the scores, as displayed in Table 1 below, and will select the node with the highest score, harvester-node-2 in this case, to deploy the VM.

    kube-scheduler logs
    virt-launcher-vm-without-overcommit-75q9b -> harvester-node-0: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9960 memory:15166603264] ,score 0,
    virt-launcher-vm-without-overcommit-75q9b -> harvester-node-1: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5560 memory:6352273408] ,score 45,
    virt-launcher-vm-without-overcommit-75q9b -> harvester-node-2: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5350 memory:5941231616] ,score 46,

    virt-launcher-vm-without-overcommit-75q9b -> harvester-node-0: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9960 memory:15166603264] ,score 4,
    virt-launcher-vm-without-overcommit-75q9b -> harvester-node-1: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5560 memory:6352273408] ,score 34,
    virt-launcher-vm-without-overcommit-75q9b -> harvester-node-2: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5350 memory:5941231616] ,score 37,

    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="ImageLocality" node="harvester-node-0" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="ImageLocality" node="harvester-node-1" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="ImageLocality" node="harvester-node-2" score=54

    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="InterPodAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="InterPodAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="InterPodAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodeResourcesLeastAllocated" node="harvester-node-0" score=4
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodeResourcesLeastAllocated" node="harvester-node-1" score=34
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodeResourcesLeastAllocated" node="harvester-node-2" score=37

    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodeAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodeAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodeAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodePreferAvoidPods" node="harvester-node-0" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodePreferAvoidPods" node="harvester-node-2" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodePreferAvoidPods" node="harvester-node-1" score=1000000

    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="PodTopologySpread" node="harvester-node-0" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="PodTopologySpread" node="harvester-node-1" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="PodTopologySpread" node="harvester-node-2" score=200

    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="TaintToleration" node="harvester-node-0" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="TaintToleration" node="harvester-node-1" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="TaintToleration" node="harvester-node-2" score=100

    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodeResourcesBalancedAllocation" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodeResourcesBalancedAllocation" node="harvester-node-1" score=45
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodeResourcesBalancedAllocation" node="harvester-node-2" score=46

    "Calculated node's final score for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" node="harvester-node-0" score=1000358
    "Calculated node's final score for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" node="harvester-node-1" score=1000433
    "Calculated node's final score for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" node="harvester-node-2" score=1000437

    AssumePodVolumes for pod "default/virt-launcher-vm-without-overcommit-75q9b", node "harvester-node-2"
    AssumePodVolumes for pod "default/virt-launcher-vm-without-overcommit-75q9b", node "harvester-node-2": all PVCs bound and nothing to do
    "Attempting to bind pod to node" pod="default/virt-launcher-vm-without-overcommit-75q9b" node="harvester-node-2"

    Table 1 - kube-scheduler scores example

    harvester-node-0harvester-node-1harvester-node-2
    ImageLocality545454
    InterPodAffinity000
    NodeResourcesLeastAllocated43437
    NodeAffinity000
    NodePreferAvoidPods100000010000001000000
    PodTopologySpread200200200
    TaintToleration100100100
    NodeResourcesBalancedAllocation04546
    Total100035810004331000437

    Why VMs are distributed unevenly with overcommit?

    With resources overcommit, Harvester modifies the resources request. By default, the overcommit configuration is {"cpu": 1600, "memory": 150, "storage": 200}. This means that if we request a VM with 1 CPU and 1G RAM, its resources.requests.cpu will become 62m.

    !!! note The unit suffix m stands for "thousandth of a core."

    To explain it, let's take the case of CPU overcommit. The default value of 1 CPU is equal to 1000m CPU, and with the default overcommit configuration of "cpu": 1600, the CPU resource will be 16x smaller. Here is the calculation: 1000m * 100 / 1600 = 62m.

    Now, we can see how overcommitting influences kube-scheduler scores.

    In this example, we use a three nodes Harvester cluster with 6 cores and 16G RAM each. We will deploy two VMs with 1 CPU and 1G RAM, and we will compare the scores for both cases of "with-overcommit" and "without-overcommit" resources.

    The results of both tables Table 2 and Table 3 can be explained as follow:

    In the "with-overcommit" case, both VMs are deployed on harvester-node-2, however in the "without-overcommit" case, the VM1 is deployed on harvester-node-2, and VM2 is deployed on harvester-node-1.

    If we look at the detailed scores, we'll see a variation of Total Score for harvester-node-2 from 1000459 to 1000461 in the "with-overcommit" case, and 1000437 to 1000382 in the "without-overcommit case". It's because resources overcommit influences request-cpu and request-memory.

    In the "with-overcommit" case, the request-cpu changes from 4412m to 4474m. The difference between the two numbers is 62m, which is what we calculated above. However, in the "without-overcommit" case, we send real requests to kube-scheduler, so the request-cpu changes from 5350m to 6350m.

    Finally, since most plugins give the same scores for each node except NodeResourcesBalancedAllocation and NodeResourcesLeastAllocated, we'll see a difference of these two scores for each node.

    From the results, we can see the overcommit feature influences the final score of each Node, so VMs are distributed unevenly. Although the harvester-node-2 score for VM 2 is higher than VM 1, it's not always increasing. In Table 4, we keep deploying VM with 1 CPU and 1G RAM, and we can see the score of harvester-node-2 starts decreasing from 11th VM. The behavior of kube-scheduler depends on your cluster resources and the workload you deployed.

    kube-scheduler logs for vm1-with-overcommit
    virt-launcher-vm1-with-overcommit-ljlmq -> harvester-node-0: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9022 memory:14807289856] ,score 0,
    virt-launcher-vm1-with-overcommit-ljlmq -> harvester-node-1: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4622 memory:5992960000] ,score 58,
    virt-launcher-vm1-with-overcommit-ljlmq -> harvester-node-2: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4412 memory:5581918208] ,score 59,

    virt-launcher-vm1-with-overcommit-ljlmq -> harvester-node-0: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9022 memory:14807289856] ,score 5,
    virt-launcher-vm1-with-overcommit-ljlmq -> harvester-node-1: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4622 memory:5992960000] ,score 43,
    virt-launcher-vm1-with-overcommit-ljlmq -> harvester-node-2: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4412 memory:5581918208] ,score 46,

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="InterPodAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="InterPodAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="InterPodAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodeResourcesLeastAllocated" node="harvester-node-0" score=5
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodeResourcesLeastAllocated" node="harvester-node-1" score=43
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodeResourcesLeastAllocated" node="harvester-node-2" score=46

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodeAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodeAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodeAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodePreferAvoidPods" node="harvester-node-0" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodePreferAvoidPods" node="harvester-node-1" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodePreferAvoidPods" node="harvester-node-2" score=1000000

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="PodTopologySpread" node="harvester-node-0" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="PodTopologySpread" node="harvester-node-1" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="PodTopologySpread" node="harvester-node-2" score=200

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="TaintToleration" node="harvester-node-0" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="TaintToleration" node="harvester-node-1" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="TaintToleration" node="harvester-node-2" score=100

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodeResourcesBalancedAllocation" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodeResourcesBalancedAllocation" node="harvester-node-1" score=58
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodeResourcesBalancedAllocation" node="harvester-node-2" score=59

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="ImageLocality" node="harvester-node-0" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="ImageLocality" node="harvester-node-1" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="ImageLocality" node="harvester-node-2" score=54

    "Calculated node's final score for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" node="harvester-node-0" score=1000359
    "Calculated node's final score for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" node="harvester-node-1" score=1000455
    "Calculated node's final score for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" node="harvester-node-2" score=1000459

    AssumePodVolumes for pod "default/virt-launcher-vm1-with-overcommit-ljlmq", node "harvester-node-2"
    AssumePodVolumes for pod "default/virt-launcher-vm1-with-overcommit-ljlmq", node "harvester-node-2": all PVCs bound and nothing to do
    "Attempting to bind pod to node" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" node="harvester-node-2"
    kube-scheduler logs for vm2-with-overcommit
    virt-launcher-vm2-with-overcommit-pwrx4 -> harvester-node-0: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9022 memory:14807289856] ,score 0,
    virt-launcher-vm2-with-overcommit-pwrx4 -> harvester-node-1: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4622 memory:5992960000] ,score 58,
    virt-launcher-vm2-with-overcommit-pwrx4 -> harvester-node-2: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4474 memory:6476701696] ,score 64,

    virt-launcher-vm2-with-overcommit-pwrx4 -> harvester-node-0: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9022 memory:14807289856] ,score 5,
    virt-launcher-vm2-with-overcommit-pwrx4 -> harvester-node-1: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4622 memory:5992960000] ,score 43,
    virt-launcher-vm2-with-overcommit-pwrx4 -> harvester-node-2: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4474 memory:6476701696] ,score 43,

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodeAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodeAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodeAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodePreferAvoidPods" node="harvester-node-0" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodePreferAvoidPods" node="harvester-node-1" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodePreferAvoidPods" node="harvester-node-2" score=1000000

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="PodTopologySpread" node="harvester-node-0" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="PodTopologySpread" node="harvester-node-1" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="PodTopologySpread" node="harvester-node-2" score=200

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="TaintToleration" node="harvester-node-0" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="TaintToleration" node="harvester-node-1" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="TaintToleration" node="harvester-node-2" score=100

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodeResourcesBalancedAllocation" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodeResourcesBalancedAllocation" node="harvester-node-1" score=58
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodeResourcesBalancedAllocation" node="harvester-node-2" score=64

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="ImageLocality" node="harvester-node-0" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="ImageLocality" node="harvester-node-1" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="ImageLocality" node="harvester-node-2" score=54

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="InterPodAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="InterPodAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="InterPodAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodeResourcesLeastAllocated" node="harvester-node-0" score=5
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodeResourcesLeastAllocated" node="harvester-node-1" score=43
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodeResourcesLeastAllocated" node="harvester-node-2" score=43

    "Calculated node's final score for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" node="harvester-node-0" score=1000359
    "Calculated node's final score for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" node="harvester-node-1" score=1000455
    "Calculated node's final score for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" node="harvester-node-2" score=1000461

    AssumePodVolumes for pod "default/virt-launcher-vm2-with-overcommit-pwrx4", node "harvester-node-2"
    AssumePodVolumes for pod "default/virt-launcher-vm2-with-overcommit-pwrx4", node "harvester-node-2": all PVCs bound and nothing to do
    "Attempting to bind pod to node" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" node="harvester-node-2"
    kube-scheduler logs for vm1-without-overcommit
    virt-launcher-vm1-with-overcommit-6xqmq -> harvester-node-0: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9960 memory:15166603264] ,score 0,
    virt-launcher-vm1-with-overcommit-6xqmq -> harvester-node-1: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5560 memory:6352273408] ,score 45,
    virt-launcher-vm1-with-overcommit-6xqmq -> harvester-node-2: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5350 memory:5941231616] ,score 46,

    virt-launcher-vm1-with-overcommit-6xqmq -> harvester-node-0: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9960 memory:15166603264] ,score 4,
    virt-launcher-vm1-with-overcommit-6xqmq -> harvester-node-1: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5560 memory:6352273408] ,score 34,
    virt-launcher-vm1-with-overcommit-6xqmq -> harvester-node-2: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5350 memory:5941231616] ,score 37,

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="InterPodAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="InterPodAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="InterPodAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodeResourcesLeastAllocated" node="harvester-node-0" score=4
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodeResourcesLeastAllocated" node="harvester-node-1" score=34
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodeResourcesLeastAllocated" node="harvester-node-2" score=37

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodeAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodeAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodeAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodePreferAvoidPods" node="harvester-node-0" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodePreferAvoidPods" node="harvester-node-1" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodePreferAvoidPods" node="harvester-node-2" score=1000000

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="PodTopologySpread" node="harvester-node-0" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="PodTopologySpread" node="harvester-node-1" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="PodTopologySpread" node="harvester-node-2" score=200

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="TaintToleration" node="harvester-node-0" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="TaintToleration" node="harvester-node-1" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="TaintToleration" node="harvester-node-2" score=100

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodeResourcesBalancedAllocation" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodeResourcesBalancedAllocation" node="harvester-node-1" score=45
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodeResourcesBalancedAllocation" node="harvester-node-2" score=46

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="ImageLocality" node="harvester-node-0" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="ImageLocality" node="harvester-node-1" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="ImageLocality" node="harvester-node-2" score=54

    "Calculated node's final score for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" node="harvester-node-0" score=1000358
    "Calculated node's final score for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" node="harvester-node-1" score=1000433
    "Calculated node's final score for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" node="harvester-node-2" score=1000437

    AssumePodVolumes for pod "default/virt-launcher-vm1-with-overcommit-6xqmq", node "harvester-node-2"
    AssumePodVolumes for pod "default/virt-launcher-vm1-with-overcommit-6xqmq", node "harvester-node-2": all PVCs bound and nothing to do
    "Attempting to bind pod to node" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" node="harvester-node-2"
    kube-scheduler logs for vm2-without-overcommit
    virt-launcher-vm2-without-overcommit-mf5vk -> harvester-node-0: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9960 memory:15166603264] ,score 0,
    virt-launcher-vm2-without-overcommit-mf5vk -> harvester-node-1: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5560 memory:6352273408] ,score 45,
    virt-launcher-vm2-without-overcommit-mf5vk -> harvester-node-2: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:6350 memory:7195328512] ,score 0,

    virt-launcher-vm2-without-overcommit-mf5vk -> harvester-node-0: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9960 memory:15166603264] ,score 4,
    virt-launcher-vm2-without-overcommit-mf5vk -> harvester-node-1: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5560 memory:6352273408] ,score 34,
    virt-launcher-vm2-without-overcommit-mf5vk -> harvester-node-2: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:6350 memory:7195328512] ,score 28,

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="PodTopologySpread" node="harvester-node-0" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="PodTopologySpread" node="harvester-node-1" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="PodTopologySpread" node="harvester-node-2" score=200

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="TaintToleration" node="harvester-node-0" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="TaintToleration" node="harvester-node-1" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="TaintToleration" node="harvester-node-2" score=100

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodeResourcesBalancedAllocation" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodeResourcesBalancedAllocation" node="harvester-node-1" score=45
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodeResourcesBalancedAllocation" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="ImageLocality" node="harvester-node-0" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="ImageLocality" node="harvester-node-1" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="ImageLocality" node="harvester-node-2" score=54

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="InterPodAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="InterPodAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="InterPodAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodeResourcesLeastAllocated" node="harvester-node-0" score=4
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodeResourcesLeastAllocated" node="harvester-node-1" score=34
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodeResourcesLeastAllocated" node="harvester-node-2" score=28

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodeAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodeAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodeAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodePreferAvoidPods" node="harvester-node-0" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodePreferAvoidPods" node="harvester-node-1" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodePreferAvoidPods" node="harvester-node-2" score=1000000

    "Calculated node's final score for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" node="harvester-node-0" score=1000358
    "Calculated node's final score for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" node="harvester-node-1" score=1000433
    "Calculated node's final score for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" node="harvester-node-2" score=1000382

    AssumePodVolumes for pod "default/virt-launcher-vm2-without-overcommit-mf5vk", node "harvester-node-1"
    AssumePodVolumes for pod "default/virt-launcher-vm2-without-overcommit-mf5vk", node "harvester-node-1": all PVCs bound and nothing to do
    "Attempting to bind pod to node" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" node="harvester-node-1"

    Table 2 - With Overcommit

    VM 1 / VM 2harvester-node-0harvester-node-1harvester-node-2
    request-cpu (m)9022 / 90224622 / 46224412 / 4474
    request-memory14807289856 / 148072898565992960000 / 59929600005581918208 / 6476701696
    NodeResourcesBalancedAllocation Score0 / 058 / 5859 / 64
    NodeResourcesLeastAllocated Score5 / 543 / 4346 / 43
    Other Scores1000354 / 10003541000354 / 10003541000354 / 1000354
    Total Score1000359 / 10003591000455 / 10004551000459 / 1000461

    Table 3 - Without Overcommit

    VM 1 / VM 2harvester-node-0harvester-node-1harvester-node-2
    request-cpu (m)9960 / 99605560 / 55605350 / 6350
    request-memory15166603264 / 151666032646352273408 / 63522734085941231616 / 7195328512
    NodeResourcesBalancedAllocation Score0 / 045 / 4546 / 0
    NodeResourcesLeastAllocated Score4 / 434 / 3437 / 28
    Other Scores1000354 / 10003541000354 / 10003541000354 / 1000354
    Total Score1000358 / 10003581000358 / 10004331000437 / 1000382

    Table 4

    Scoreharvester-node-0harvester-node-1harvester-node-2
    VM 1100035910004551000459
    VM 2100035910004551000461
    VM 3100035910004551000462
    VM 4100035910004551000462
    VM 5100035910004551000463
    VM 6100035910004551000465
    VM 7100035910004551000466
    VM 8100035910004551000467
    VM 9100035910004551000469
    VM 10100035910004551000469
    VM 11100035910004551000465
    VM 12100035910004551000457

    How to avoid uneven distribution of VMs?

    There are many plugins in kube-scheduler which we can use to influence the scores. For example, we can add the podAntiAffinity plugin to avoid VMs with the same labels being deployed on the same node.

      affinity:
    podAntiAffinity:
    preferredDuringSchedulingIgnoredDuringExecution:
    - podAffinityTerm:
    labelSelector:
    matchExpressions:
    - key: harvesterhci.io/creator
    operator: Exists
    topologyKey: kubernetes.io/hostname
    weight: 100

    How to see scores in kube-scheduler?

    kube-scheduler is deployed as a static pod in Harvester. The file is under /var/lib/rancher/rke2/agent/pod-manifests/kube-scheduler.yaml in each Management Node. We can add - --v=10 to the kube-scheduler container to show score logs.

    kind: Pod
    metadata:
    labels:
    component: kube-scheduler
    tier: control-plane
    name: kube-scheduler
    namespace: kube-system
    spec:
    containers:
    - command:
    - kube-scheduler
    # ...
    - --v=10
    - + \ No newline at end of file diff --git a/kb/upgrading_guest_clusters_with_harvester_ip_pool_compatibility/index.html b/kb/upgrading_guest_clusters_with_harvester_ip_pool_compatibility/index.html index f433fdec..9b82cc25 100644 --- a/kb/upgrading_guest_clusters_with_harvester_ip_pool_compatibility/index.html +++ b/kb/upgrading_guest_clusters_with_harvester_ip_pool_compatibility/index.html @@ -9,13 +9,13 @@ Upgrade Guest Kubernetes Clusters to be Compatible with Harvester IP Pools | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    Upgrade Guest Kubernetes Clusters to be Compatible with Harvester IP Pools

    · 3 min read
    Canwu Yao

    As Harvester v1.2.0 is released, a new Harvester cloud provider version 0.2.2 is integrated into RKE2 v1.24.15+rke2r1, v1.25.11+rke2r1, v1.26.6+rke2r1, v1.27.3+rke2r1, and newer versions.

    With Harvester v1.2.0, the new Harvester cloud provider offers enhanced load balancing capabilities for guest Kubernetes services. Specifically, it introduces the Harvester IP Pool feature, a built-in IP address management (IPAM) solution for the Harvester load balancer. It allows you to define an IP pool specific to a particular guest cluster by specifying the guest cluster name. For example, you can create an IP pool exclusively for the guest cluster named cluster2:

    image

    However, after upgrading, the feature is not automatically compatible with existing guest Kubernetes clusters, as they do not pass the correct cluster name to the Harvester cloud provider. Refer to issue 4232 for more details. Users can manually upgrade the Harvester cloud provider using Helm as a workaround and provide the correct cluster name after upgrading. However, this would result in a change in the load balancer IPs.

    This article outlines a workaround that allows you to leverage the new IP pool feature while keeping the load balancer IPs unchanged.

    Prerequisites

    • Download the Harvester kubeconfig file from the Harvester UI. If you have imported Harvester into Rancher, do not use the kubeconfig file from the Rancher UI. Refer to Access Harvester Cluster to get the desired one.

    • Download the kubeconfig file for the guest Kubernetes cluster you plan to upgrade. Refer to Accessing Clusters with kubectl from Your Workstation for instructions on how to download the kubeconfig file.

    Steps to Keep Load Balancer IP

    1. Execute the following script before upgrading.

      curl -sfL https://raw.githubusercontent.com/harvester/harvesterhci.io/main/kb/2023-08-21/keepip.sh | sh -s before_upgrade <Harvester-kubeconfig-path> <guest-cluster-kubeconfig-path> <guest-cluster-name> <guest-cluster-nodes-namespace>
      • <Harvester-kubeconfig-path>: Path to the Harvester kubeconfig file.
      • <guest-cluster-kubeconfig-path>: Path to the kubeconfig file of your guest Kubernetes cluster.
      • <guest-cluster-name>: Name of your guest cluster.
      • <guest-cluster-nodes-namespace>: Namespace where the VMs of the guest cluster are located.

      The script will help users copy the DHCP information to the service annotation and modify the IP pool allocated history to make sure the IP is unchanged.

      image

      After executing the script, the load balancer service with DHCP mode will be annotated with the DHCP information. For example:

      apiVersion: v1
      kind: Service
      metadata:
      annotations:
      kube-vip.io/hwaddr: 00:00:6c:4f:18:68
      kube-vip.io/requestedIP: 172.19.105.215
      name: lb0
      namespace: default

      As for the load balancer service with pool mode, the IP pool allocated history will be modified as the new load balancer name. For example:

      apiVersion: loadbalancer.harvesterhci.io/v1beta1
      kind: IPPool
      metadata:
      name: default
      spec:
      ...
      status:
      allocatedHistory:
      192.168.100.2: default/cluster-name-default-lb1-ddc13071 # replace the new load balancer name
    2. Add network selector for the pool.

      For example, the following cluster is under the VM network default/mgmt-untagged. The network selector should be default/mgmt-untagged.

      image

      image

    3. Upgrade the RKE2 cluster in the Rancher UI and select the new version.

      image

    4. Execute the script after upgrading.

      curl -sfL https://raw.githubusercontent.com/harvester/harvesterhci.io/main/kb/2023-08-21/keepip.sh | sh -s after_upgrade <Harvester-kubeconfig-path> <guest-cluster-kubeconfig-path> <guest-cluster-name> <guest-cluster-nodes-namespace>

      image

      In this step, the script wraps the operations to upgrade the Harvester cloud provider to set the cluster name. After the Harvester cloud provider is running, the new Harvester load balancers will be created with the unchanged IPs.

    - + \ No newline at end of file diff --git a/kb/use_rook_ceph_external_storage/index.html b/kb/use_rook_ceph_external_storage/index.html index f3459d6d..b6411490 100644 --- a/kb/use_rook_ceph_external_storage/index.html +++ b/kb/use_rook_ceph_external_storage/index.html @@ -9,13 +9,13 @@ Use Rook Ceph External Storage with Harvester | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    Use Rook Ceph External Storage with Harvester

    · 4 min read
    Hang Yu

    Starting with Harvester v1.2.0, it offers the capability to install a Container Storage Interface (CSI) in your Harvester cluster. This allows you to leverage external storage for the Virtual Machine's non-system data disk, giving you the flexibility to use different drivers tailored for specific needs, whether it's for performance optimization or seamless integration with your existing in-house storage solutions.

    It's important to note that, despite this enhancement, the provisioner for the Virtual Machine (VM) image in Harvester still relies on Longhorn. Prior to version 1.2.0, Harvester exclusively supported Longhorn for storing VM data and did not offer support for external storage as a destination for VM data.

    One of the options for integrating external storage with Harvester is Rook, an open-source cloud-native storage orchestrator. Rook provides a robust platform, framework, and support for Ceph storage, enabling seamless integration with cloud-native environments.

    Ceph is a software-defined distributed storage system that offers versatile storage capabilities, including file, block, and object storage. It is designed for large-scale production clusters and can be deployed effectively in such environments.

    Rook simplifies the deployment and management of Ceph, offering self-managing, self-scaling, and self-healing storage services. It leverages Kubernetes resources to automate the deployment, configuration, provisioning, scaling, upgrading, and monitoring of Ceph.

    In this article, we will walk you through the process of installing, configuring, and utilizing Rook to use storage from an existing external Ceph cluster as a data disk for a VM within the Harvester environment.

    Install Harvester Cluster

    Harvester's operating system follows an immutable design, meaning that most OS files revert to their pre-configured state after a reboot. To accommodate Rook Ceph's requirements, you need to add specific persistent paths to the os.persistentStatePaths section in the Harvester configuration. These paths include:

    os:
    persistent_state_paths:
    - /var/lib/rook
    - /var/lib/ceph
    modules:
    - rbd
    - nbd

    After the cluster is installed, refer to How can I access the kubeconfig file of the Harvester cluster? to get the kubeconfig of the Harvester cluster.

    Install Rook to Harvester

    Install Rook to the Harvester cluster by referring to Rook Quickstart.

    curl -fsSLo rook.tar.gz https://github.com/rook/rook/archive/refs/tags/v1.12.2.tar.gz \
    && tar -zxf rook.tar.gz && cd rook-1.12.2/deploy/examples
    # apply configurations ref: https://rook.github.io/docs/rook/v1.12/Getting-Started/example-configurations/
    kubectl apply -f crds.yaml -f common.yaml -f operator.yaml
    kubectl -n rook-ceph wait --for=condition=Available deploy rook-ceph-operator --timeout=10m

    Using an existing external Ceph cluster

    1. Run the python script create-external-cluster-resources.py in the existing external Ceph cluster for creating all users and keys.
    # script help ref: https://www.rook.io/docs/rook/v1.12/CRDs/Cluster/external-cluster/#1-create-all-users-and-keys
    curl -s https://raw.githubusercontent.com/rook/rook/v1.12.2/deploy/examples/create-external-cluster-resources.py > create-external-cluster-resources.py
    python3 create-external-cluster-resources.py --rbd-data-pool-name <pool_name> --namespace rook-ceph-external --format bash
    1. Copy the Bash output.

    Example output:

    export NAMESPACE=rook-ceph-external
    export ROOK_EXTERNAL_FSID=b3b47828-4c60-11ee-be38-51902f85c805
    export ROOK_EXTERNAL_USERNAME=client.healthchecker
    export ROOK_EXTERNAL_CEPH_MON_DATA=ceph-1=192.168.5.99:6789
    export ROOK_EXTERNAL_USER_SECRET=AQDd6/dkFyu/IhAATv/uCMbHtWk4AYK2KXzBhQ==
    export ROOK_EXTERNAL_DASHBOARD_LINK=https://192.168.5.99:8443/
    export CSI_RBD_NODE_SECRET=AQDd6/dk2HsjIxAA06Yw9UcOg0dfwV/9IFBRhA==
    export CSI_RBD_NODE_SECRET_NAME=csi-rbd-node
    export CSI_RBD_PROVISIONER_SECRET=AQDd6/dkEY1kIxAAAzrXZnVRf4x+wDUz1zyaQg==
    export CSI_RBD_PROVISIONER_SECRET_NAME=csi-rbd-provisioner
    export MONITORING_ENDPOINT=192.168.5.99
    export MONITORING_ENDPOINT_PORT=9283
    export RBD_POOL_NAME=test
    export RGW_POOL_PREFIX=default
    1. Consume the external Ceph cluster resources on the Harvester cluster.
    # Paste the above output from create-external-cluster-resources.py into import-env.sh
    vim import-env.sh
    source import-env.sh
    # this script will create a StorageClass ceph-rbd
    source import-external-cluster.sh
    kubectl apply -f common-external.yaml
    kubectl apply -f cluster-external.yaml
    # wait for all pods to become Ready
    watch 'kubectl --namespace rook-ceph get pods'
    1. Create the VolumeSnapshotClass csi-rbdplugin-snapclass-external.
    cat >./csi/rbd/snapshotclass-external.yaml <<EOF
    ---
    apiVersion: snapshot.storage.k8s.io/v1
    kind: VolumeSnapshotClass
    metadata:
    name: csi-rbdplugin-snapclass-external
    driver: rook-ceph.rbd.csi.ceph.com # driver:namespace:operator
    parameters:
    clusterID: rook-ceph-external # namespace:cluster
    csi.storage.k8s.io/snapshotter-secret-name: rook-csi-rbd-provisioner
    csi.storage.k8s.io/snapshotter-secret-namespace: rook-ceph-external # namespace:cluster
    deletionPolicy: Delete
    EOF

    kubectl apply -f ./csi/rbd/snapshotclass-external.yaml

    Configure Harvester Cluster

    Before you can make use of Harvester's Backup & Snapshot features, you need to set up some essential configurations through the Harvester csi-driver-config setting. To set up these configurations, follow these steps:

    1. Login to the Harvester UI, then navigate to Advanced > Settings.
    2. Find and select csi-driver-config, and then click on the > Edit Setting to access the configuration options.
    3. In the settings, set the Provisioner to rook-ceph.rbd.csi.ceph.com.
    4. Next, specify the Volume Snapshot Class Name as csi-rbdplugin-snapclass-external. This setting points to the name of the VolumeSnapshotClass used for creating volume snapshots or VM snapshots.
    5. Similarly, set the Backup Volume Snapshot Class Name to csi-rbdplugin-snapclass-external. This corresponds to the name of the VolumeSnapshotClass responsible for creating VM backups.

    csi-driver-config-external

    Use Rook Ceph in Harvester

    After successfully configuring these settings, you can proceed to utilize the Rook Ceph StorageClass, which is named rook-ceph-block for the internal Ceph cluster or named ceph-rbd for the external Ceph cluster. You can apply this StorageClass when creating an empty volume or adding a new block volume to a VM, enhancing your Harvester cluster's storage capabilities.

    With these configurations in place, your Harvester cluster is ready to make the most of the Rook Ceph storage integration.

    rook-ceph-volume-external

    rook-ceph-vm-external

    - + \ No newline at end of file diff --git a/kb/vm-scheduling/index.html b/kb/vm-scheduling/index.html index f88f0d0c..53beeecc 100644 --- a/kb/vm-scheduling/index.html +++ b/kb/vm-scheduling/index.html @@ -9,14 +9,14 @@ VM Scheduling | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    VM Scheduling

    · 16 min read
    PoAn Yang

    How does Harvester schedule a VM?

    Harvester doesn't directly schedule a VM in Kubernetes, it relies on KubeVirt to create the custom resource VirtualMachine. When the request to create a new VM is sent, a VirtualMachineInstance object is created and it creates the corresponding Pod.

    The whole VM creation processt leverages kube-scheduler, which allows Harvester to use nodeSelector, affinity, and resources request/limitation to influence where a VM will be deployed.

    How does kube-scheduler decide where to deploy a VM?

    First, kube-scheduler finds Nodes available to run a pod. After that, kube-scheduler scores each available Node by a list of plugins like ImageLocality, InterPodAffinity, NodeAffinity, etc.

    Finally, kube-scheduler calculates the scores from the plugins results for each Node, and select the Node with the highest score to deploy the Pod.

    For example, let's say we have a three nodes Harvester cluster with 6 cores CPU and 16G RAM each, and we want to deploy a VM with 1 CPU and 1G RAM (without resources overcommit).

    kube-scheduler will summarize the scores, as displayed in Table 1 below, and will select the node with the highest score, harvester-node-2 in this case, to deploy the VM.

    kube-scheduler logs
    virt-launcher-vm-without-overcommit-75q9b -> harvester-node-0: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9960 memory:15166603264] ,score 0,
    virt-launcher-vm-without-overcommit-75q9b -> harvester-node-1: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5560 memory:6352273408] ,score 45,
    virt-launcher-vm-without-overcommit-75q9b -> harvester-node-2: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5350 memory:5941231616] ,score 46,

    virt-launcher-vm-without-overcommit-75q9b -> harvester-node-0: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9960 memory:15166603264] ,score 4,
    virt-launcher-vm-without-overcommit-75q9b -> harvester-node-1: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5560 memory:6352273408] ,score 34,
    virt-launcher-vm-without-overcommit-75q9b -> harvester-node-2: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5350 memory:5941231616] ,score 37,

    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="ImageLocality" node="harvester-node-0" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="ImageLocality" node="harvester-node-1" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="ImageLocality" node="harvester-node-2" score=54

    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="InterPodAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="InterPodAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="InterPodAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodeResourcesLeastAllocated" node="harvester-node-0" score=4
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodeResourcesLeastAllocated" node="harvester-node-1" score=34
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodeResourcesLeastAllocated" node="harvester-node-2" score=37

    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodeAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodeAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodeAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodePreferAvoidPods" node="harvester-node-0" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodePreferAvoidPods" node="harvester-node-2" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodePreferAvoidPods" node="harvester-node-1" score=1000000

    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="PodTopologySpread" node="harvester-node-0" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="PodTopologySpread" node="harvester-node-1" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="PodTopologySpread" node="harvester-node-2" score=200

    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="TaintToleration" node="harvester-node-0" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="TaintToleration" node="harvester-node-1" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="TaintToleration" node="harvester-node-2" score=100

    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodeResourcesBalancedAllocation" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodeResourcesBalancedAllocation" node="harvester-node-1" score=45
    "Plugin scored node for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" plugin="NodeResourcesBalancedAllocation" node="harvester-node-2" score=46

    "Calculated node's final score for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" node="harvester-node-0" score=1000358
    "Calculated node's final score for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" node="harvester-node-1" score=1000433
    "Calculated node's final score for pod" pod="default/virt-launcher-vm-without-overcommit-75q9b" node="harvester-node-2" score=1000437

    AssumePodVolumes for pod "default/virt-launcher-vm-without-overcommit-75q9b", node "harvester-node-2"
    AssumePodVolumes for pod "default/virt-launcher-vm-without-overcommit-75q9b", node "harvester-node-2": all PVCs bound and nothing to do
    "Attempting to bind pod to node" pod="default/virt-launcher-vm-without-overcommit-75q9b" node="harvester-node-2"

    Table 1 - kube-scheduler scores example

    harvester-node-0harvester-node-1harvester-node-2
    ImageLocality545454
    InterPodAffinity000
    NodeResourcesLeastAllocated43437
    NodeAffinity000
    NodePreferAvoidPods100000010000001000000
    PodTopologySpread200200200
    TaintToleration100100100
    NodeResourcesBalancedAllocation04546
    Total100035810004331000437

    Why VMs are distributed unevenly with overcommit?

    With resources overcommit, Harvester modifies the resources request. By default, the overcommit configuration is {"cpu": 1600, "memory": 150, "storage": 200}. This means that if we request a VM with 1 CPU and 1G RAM, its resources.requests.cpu will become 62m.

    !!! note The unit suffix m stands for "thousandth of a core."

    To explain it, let's take the case of CPU overcommit. The default value of 1 CPU is equal to 1000m CPU, and with the default overcommit configuration of "cpu": 1600, the CPU resource will be 16x smaller. Here is the calculation: 1000m * 100 / 1600 = 62m.

    Now, we can see how overcommitting influences kube-scheduler scores.

    In this example, we use a three nodes Harvester cluster with 6 cores and 16G RAM each. We will deploy two VMs with 1 CPU and 1G RAM, and we will compare the scores for both cases of "with-overcommit" and "without-overcommit" resources.

    The results of both tables Table 2 and Table 3 can be explained as follow:

    In the "with-overcommit" case, both VMs are deployed on harvester-node-2, however in the "without-overcommit" case, the VM1 is deployed on harvester-node-2, and VM2 is deployed on harvester-node-1.

    If we look at the detailed scores, we'll see a variation of Total Score for harvester-node-2 from 1000459 to 1000461 in the "with-overcommit" case, and 1000437 to 1000382 in the "without-overcommit case". It's because resources overcommit influences request-cpu and request-memory.

    In the "with-overcommit" case, the request-cpu changes from 4412m to 4474m. The difference between the two numbers is 62m, which is what we calculated above. However, in the "without-overcommit" case, we send real requests to kube-scheduler, so the request-cpu changes from 5350m to 6350m.

    Finally, since most plugins give the same scores for each node except NodeResourcesBalancedAllocation and NodeResourcesLeastAllocated, we'll see a difference of these two scores for each node.

    From the results, we can see the overcommit feature influences the final score of each Node, so VMs are distributed unevenly. Although the harvester-node-2 score for VM 2 is higher than VM 1, it's not always increasing. In Table 4, we keep deploying VM with 1 CPU and 1G RAM, and we can see the score of harvester-node-2 starts decreasing from 11th VM. The behavior of kube-scheduler depends on your cluster resources and the workload you deployed.

    kube-scheduler logs for vm1-with-overcommit
    virt-launcher-vm1-with-overcommit-ljlmq -> harvester-node-0: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9022 memory:14807289856] ,score 0,
    virt-launcher-vm1-with-overcommit-ljlmq -> harvester-node-1: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4622 memory:5992960000] ,score 58,
    virt-launcher-vm1-with-overcommit-ljlmq -> harvester-node-2: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4412 memory:5581918208] ,score 59,

    virt-launcher-vm1-with-overcommit-ljlmq -> harvester-node-0: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9022 memory:14807289856] ,score 5,
    virt-launcher-vm1-with-overcommit-ljlmq -> harvester-node-1: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4622 memory:5992960000] ,score 43,
    virt-launcher-vm1-with-overcommit-ljlmq -> harvester-node-2: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4412 memory:5581918208] ,score 46,

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="InterPodAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="InterPodAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="InterPodAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodeResourcesLeastAllocated" node="harvester-node-0" score=5
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodeResourcesLeastAllocated" node="harvester-node-1" score=43
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodeResourcesLeastAllocated" node="harvester-node-2" score=46

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodeAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodeAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodeAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodePreferAvoidPods" node="harvester-node-0" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodePreferAvoidPods" node="harvester-node-1" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodePreferAvoidPods" node="harvester-node-2" score=1000000

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="PodTopologySpread" node="harvester-node-0" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="PodTopologySpread" node="harvester-node-1" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="PodTopologySpread" node="harvester-node-2" score=200

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="TaintToleration" node="harvester-node-0" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="TaintToleration" node="harvester-node-1" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="TaintToleration" node="harvester-node-2" score=100

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodeResourcesBalancedAllocation" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodeResourcesBalancedAllocation" node="harvester-node-1" score=58
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="NodeResourcesBalancedAllocation" node="harvester-node-2" score=59

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="ImageLocality" node="harvester-node-0" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="ImageLocality" node="harvester-node-1" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" plugin="ImageLocality" node="harvester-node-2" score=54

    "Calculated node's final score for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" node="harvester-node-0" score=1000359
    "Calculated node's final score for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" node="harvester-node-1" score=1000455
    "Calculated node's final score for pod" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" node="harvester-node-2" score=1000459

    AssumePodVolumes for pod "default/virt-launcher-vm1-with-overcommit-ljlmq", node "harvester-node-2"
    AssumePodVolumes for pod "default/virt-launcher-vm1-with-overcommit-ljlmq", node "harvester-node-2": all PVCs bound and nothing to do
    "Attempting to bind pod to node" pod="default/virt-launcher-vm1-with-overcommit-ljlmq" node="harvester-node-2"
    kube-scheduler logs for vm2-with-overcommit
    virt-launcher-vm2-with-overcommit-pwrx4 -> harvester-node-0: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9022 memory:14807289856] ,score 0,
    virt-launcher-vm2-with-overcommit-pwrx4 -> harvester-node-1: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4622 memory:5992960000] ,score 58,
    virt-launcher-vm2-with-overcommit-pwrx4 -> harvester-node-2: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4474 memory:6476701696] ,score 64,

    virt-launcher-vm2-with-overcommit-pwrx4 -> harvester-node-0: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9022 memory:14807289856] ,score 5,
    virt-launcher-vm2-with-overcommit-pwrx4 -> harvester-node-1: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4622 memory:5992960000] ,score 43,
    virt-launcher-vm2-with-overcommit-pwrx4 -> harvester-node-2: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:4474 memory:6476701696] ,score 43,

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodeAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodeAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodeAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodePreferAvoidPods" node="harvester-node-0" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodePreferAvoidPods" node="harvester-node-1" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodePreferAvoidPods" node="harvester-node-2" score=1000000

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="PodTopologySpread" node="harvester-node-0" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="PodTopologySpread" node="harvester-node-1" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="PodTopologySpread" node="harvester-node-2" score=200

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="TaintToleration" node="harvester-node-0" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="TaintToleration" node="harvester-node-1" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="TaintToleration" node="harvester-node-2" score=100

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodeResourcesBalancedAllocation" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodeResourcesBalancedAllocation" node="harvester-node-1" score=58
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodeResourcesBalancedAllocation" node="harvester-node-2" score=64

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="ImageLocality" node="harvester-node-0" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="ImageLocality" node="harvester-node-1" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="ImageLocality" node="harvester-node-2" score=54

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="InterPodAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="InterPodAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="InterPodAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodeResourcesLeastAllocated" node="harvester-node-0" score=5
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodeResourcesLeastAllocated" node="harvester-node-1" score=43
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" plugin="NodeResourcesLeastAllocated" node="harvester-node-2" score=43

    "Calculated node's final score for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" node="harvester-node-0" score=1000359
    "Calculated node's final score for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" node="harvester-node-1" score=1000455
    "Calculated node's final score for pod" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" node="harvester-node-2" score=1000461

    AssumePodVolumes for pod "default/virt-launcher-vm2-with-overcommit-pwrx4", node "harvester-node-2"
    AssumePodVolumes for pod "default/virt-launcher-vm2-with-overcommit-pwrx4", node "harvester-node-2": all PVCs bound and nothing to do
    "Attempting to bind pod to node" pod="default/virt-launcher-vm2-with-overcommit-pwrx4" node="harvester-node-2"
    kube-scheduler logs for vm1-without-overcommit
    virt-launcher-vm1-with-overcommit-6xqmq -> harvester-node-0: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9960 memory:15166603264] ,score 0,
    virt-launcher-vm1-with-overcommit-6xqmq -> harvester-node-1: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5560 memory:6352273408] ,score 45,
    virt-launcher-vm1-with-overcommit-6xqmq -> harvester-node-2: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5350 memory:5941231616] ,score 46,

    virt-launcher-vm1-with-overcommit-6xqmq -> harvester-node-0: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9960 memory:15166603264] ,score 4,
    virt-launcher-vm1-with-overcommit-6xqmq -> harvester-node-1: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5560 memory:6352273408] ,score 34,
    virt-launcher-vm1-with-overcommit-6xqmq -> harvester-node-2: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5350 memory:5941231616] ,score 37,

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="InterPodAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="InterPodAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="InterPodAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodeResourcesLeastAllocated" node="harvester-node-0" score=4
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodeResourcesLeastAllocated" node="harvester-node-1" score=34
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodeResourcesLeastAllocated" node="harvester-node-2" score=37

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodeAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodeAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodeAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodePreferAvoidPods" node="harvester-node-0" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodePreferAvoidPods" node="harvester-node-1" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodePreferAvoidPods" node="harvester-node-2" score=1000000

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="PodTopologySpread" node="harvester-node-0" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="PodTopologySpread" node="harvester-node-1" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="PodTopologySpread" node="harvester-node-2" score=200

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="TaintToleration" node="harvester-node-0" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="TaintToleration" node="harvester-node-1" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="TaintToleration" node="harvester-node-2" score=100

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodeResourcesBalancedAllocation" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodeResourcesBalancedAllocation" node="harvester-node-1" score=45
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="NodeResourcesBalancedAllocation" node="harvester-node-2" score=46

    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="ImageLocality" node="harvester-node-0" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="ImageLocality" node="harvester-node-1" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" plugin="ImageLocality" node="harvester-node-2" score=54

    "Calculated node's final score for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" node="harvester-node-0" score=1000358
    "Calculated node's final score for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" node="harvester-node-1" score=1000433
    "Calculated node's final score for pod" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" node="harvester-node-2" score=1000437

    AssumePodVolumes for pod "default/virt-launcher-vm1-with-overcommit-6xqmq", node "harvester-node-2"
    AssumePodVolumes for pod "default/virt-launcher-vm1-with-overcommit-6xqmq", node "harvester-node-2": all PVCs bound and nothing to do
    "Attempting to bind pod to node" pod="default/virt-launcher-vm1-with-overcommit-6xqmq" node="harvester-node-2"
    kube-scheduler logs for vm2-without-overcommit
    virt-launcher-vm2-without-overcommit-mf5vk -> harvester-node-0: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9960 memory:15166603264] ,score 0,
    virt-launcher-vm2-without-overcommit-mf5vk -> harvester-node-1: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5560 memory:6352273408] ,score 45,
    virt-launcher-vm2-without-overcommit-mf5vk -> harvester-node-2: NodeResourcesBalancedAllocation, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:6350 memory:7195328512] ,score 0,

    virt-launcher-vm2-without-overcommit-mf5vk -> harvester-node-0: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:9960 memory:15166603264] ,score 4,
    virt-launcher-vm2-without-overcommit-mf5vk -> harvester-node-1: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:5560 memory:6352273408] ,score 34,
    virt-launcher-vm2-without-overcommit-mf5vk -> harvester-node-2: NodeResourcesLeastAllocated, map of allocatable resources map[cpu:6000 memory:16776437760], map of requested resources map[cpu:6350 memory:7195328512] ,score 28,

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="PodTopologySpread" node="harvester-node-0" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="PodTopologySpread" node="harvester-node-1" score=200
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="PodTopologySpread" node="harvester-node-2" score=200

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="TaintToleration" node="harvester-node-0" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="TaintToleration" node="harvester-node-1" score=100
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="TaintToleration" node="harvester-node-2" score=100

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodeResourcesBalancedAllocation" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodeResourcesBalancedAllocation" node="harvester-node-1" score=45
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodeResourcesBalancedAllocation" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="ImageLocality" node="harvester-node-0" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="ImageLocality" node="harvester-node-1" score=54
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="ImageLocality" node="harvester-node-2" score=54

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="InterPodAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="InterPodAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="InterPodAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodeResourcesLeastAllocated" node="harvester-node-0" score=4
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodeResourcesLeastAllocated" node="harvester-node-1" score=34
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodeResourcesLeastAllocated" node="harvester-node-2" score=28

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodeAffinity" node="harvester-node-0" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodeAffinity" node="harvester-node-1" score=0
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodeAffinity" node="harvester-node-2" score=0

    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodePreferAvoidPods" node="harvester-node-0" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodePreferAvoidPods" node="harvester-node-1" score=1000000
    "Plugin scored node for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" plugin="NodePreferAvoidPods" node="harvester-node-2" score=1000000

    "Calculated node's final score for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" node="harvester-node-0" score=1000358
    "Calculated node's final score for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" node="harvester-node-1" score=1000433
    "Calculated node's final score for pod" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" node="harvester-node-2" score=1000382

    AssumePodVolumes for pod "default/virt-launcher-vm2-without-overcommit-mf5vk", node "harvester-node-1"
    AssumePodVolumes for pod "default/virt-launcher-vm2-without-overcommit-mf5vk", node "harvester-node-1": all PVCs bound and nothing to do
    "Attempting to bind pod to node" pod="default/virt-launcher-vm2-without-overcommit-mf5vk" node="harvester-node-1"

    Table 2 - With Overcommit

    VM 1 / VM 2harvester-node-0harvester-node-1harvester-node-2
    request-cpu (m)9022 / 90224622 / 46224412 / 4474
    request-memory14807289856 / 148072898565992960000 / 59929600005581918208 / 6476701696
    NodeResourcesBalancedAllocation Score0 / 058 / 5859 / 64
    NodeResourcesLeastAllocated Score5 / 543 / 4346 / 43
    Other Scores1000354 / 10003541000354 / 10003541000354 / 1000354
    Total Score1000359 / 10003591000455 / 10004551000459 / 1000461

    Table 3 - Without Overcommit

    VM 1 / VM 2harvester-node-0harvester-node-1harvester-node-2
    request-cpu (m)9960 / 99605560 / 55605350 / 6350
    request-memory15166603264 / 151666032646352273408 / 63522734085941231616 / 7195328512
    NodeResourcesBalancedAllocation Score0 / 045 / 4546 / 0
    NodeResourcesLeastAllocated Score4 / 434 / 3437 / 28
    Other Scores1000354 / 10003541000354 / 10003541000354 / 1000354
    Total Score1000358 / 10003581000358 / 10004331000437 / 1000382

    Table 4

    Scoreharvester-node-0harvester-node-1harvester-node-2
    VM 1100035910004551000459
    VM 2100035910004551000461
    VM 3100035910004551000462
    VM 4100035910004551000462
    VM 5100035910004551000463
    VM 6100035910004551000465
    VM 7100035910004551000466
    VM 8100035910004551000467
    VM 9100035910004551000469
    VM 10100035910004551000469
    VM 11100035910004551000465
    VM 12100035910004551000457

    How to avoid uneven distribution of VMs?

    There are many plugins in kube-scheduler which we can use to influence the scores. For example, we can add the podAntiAffinity plugin to avoid VMs with the same labels being deployed on the same node.

      affinity:
    podAntiAffinity:
    preferredDuringSchedulingIgnoredDuringExecution:
    - podAffinityTerm:
    labelSelector:
    matchExpressions:
    - key: harvesterhci.io/creator
    operator: Exists
    topologyKey: kubernetes.io/hostname
    weight: 100

    How to see scores in kube-scheduler?

    kube-scheduler is deployed as a static pod in Harvester. The file is under /var/lib/rancher/rke2/agent/pod-manifests/kube-scheduler.yaml in each Management Node. We can add - --v=10 to the kube-scheduler container to show score logs.

    kind: Pod
    metadata:
    labels:
    component: kube-scheduler
    tier: control-plane
    name: kube-scheduler
    namespace: kube-system
    spec:
    containers:
    - command:
    - kube-scheduler
    # ...
    - --v=10
    - + \ No newline at end of file diff --git a/kb/vm_live_migration_policy_and_configuration/index.html b/kb/vm_live_migration_policy_and_configuration/index.html index 83653d93..7b68fdd7 100644 --- a/kb/vm_live_migration_policy_and_configuration/index.html +++ b/kb/vm_live_migration_policy_and_configuration/index.html @@ -9,13 +9,13 @@ VM Live Migration Policy and Configuration | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    VM Live Migration Policy and Configuration

    · 11 min read
    Jian Wang

    In Harvester, the VM Live Migration is well supported by the UI. Please refer to Harvester VM Live Migration for more details.

    The VM Live Migration process is finished smoothly in most cases. However, sometimes the migration may get stuck and not end as expected.

    This article dives into the VM Live Migration process in more detail. There are three main parts:

    • General Process of VM Live Migration
    • VM Live Migration Strategies
    • VM Live Migration Configurations

    Related issues:

    note

    A big part of the following contents are copied from kubevirt document https://kubevirt.io/user-guide/operations/live_migration/, some contents/formats are adjusted to fit in this document.

    General Process of VM Live Migration

    Starting a Migration from Harvester UI

    1. Go to the Virtual Machines page.
    2. Find the virtual machine that you want to migrate and select > Migrate.
    3. Choose the node to which you want to migrate the virtual machine and select Apply.

    After successfully selecting Apply, a CRD VirtualMachineInstanceMigration object is created, and the related controller/operator will start the process.

    Migration CRD Object

    You can also create the CRD VirtualMachineInstanceMigration object manually via kubectl or other tools.

    The example below starts a migration process for a virtual machine instance (VMI) new-vm.

    apiVersion: kubevirt.io/v1
    kind: VirtualMachineInstanceMigration
    metadata:
    name: migration-job
    spec:
    vmiName: new-vm

    Under the hood, the open source projects Kubevirt, Libvirt, QEMU, ... perform most of the VM Live Migration. References.

    Migration Status Reporting

    When starting a virtual machine instance (VMI), it has also been calculated whether the machine is live migratable. The result is being stored in the VMI VMI.status.conditions. The calculation can be based on multiple parameters of the VMI, however, at the moment, the calculation is largely based on the Access Mode of the VMI volumes. Live migration is only permitted when the volume access mode is set to ReadWriteMany. Requests to migrate a non-LiveMigratable VMI will be rejected.

    The reported Migration Method is also being calculated during VMI start. BlockMigration indicates that some of the VMI disks require copying from the source to the destination. LiveMigration means that only the instance memory will be copied.

    Status:
    Conditions:
    Status: True
    Type: LiveMigratable
    Migration Method: BlockMigration

    Migration Status

    The migration progress status is reported in VMI.status. Most importantly, it indicates whether the migration has been completed or failed.

    Below is an example of a successful migration.

    Migration State:
    Completed: true
    End Timestamp: 2019-03-29T03:37:52Z
    Migration Config:
    Completion Timeout Per GiB: 800
    Progress Timeout: 150
    Migration UID: c64d4898-51d3-11e9-b370-525500d15501
    Source Node: node02
    Start Timestamp: 2019-03-29T04:02:47Z
    Target Direct Migration Node Ports:
    35001: 0
    41068: 49152
    38284: 49153
    Target Node: node01
    Target Node Address: 10.128.0.46
    Target Node Domain Detected: true
    Target Pod: virt-launcher-testvmimcbjgw6zrzcmp8wpddvztvzm7x2k6cjbdgktwv8tkq

    VM Live Migration Strategies

    VM Live Migration is a process during which a running Virtual Machine Instance moves to another compute node while the guest workload continues to run and remain accessible.

    Understanding Different VM Live Migration Strategies

    VM Live Migration is a complex process. During a migration, the source VM needs to transfer its whole state (mainly RAM) to the target VM. If there are enough resources available, such as network bandwidth and CPU power, migrations should converge nicely. If this is not the scenario, however, the migration might get stuck without an ability to progress.

    The main factor that affects migrations from the guest perspective is its dirty rate, which is the rate by which the VM dirties memory. Guests with high dirty rate lead to a race during migration. On the one hand, memory would be transferred continuously to the target, and on the other, the same memory would get dirty by the guest. On such scenarios, one could consider to use more advanced migration strategies. Refer to Understanding different migration strategies for more details.

    There are 3 VM Live Migration strategies/policies:

    VM Live Migration Strategy: Pre-copy

    Pre-copy is the default strategy. It should be used for most cases.

    The way it works is as following:

    1. The target VM is created, but the guest keeps running on the source VM.
    2. The source starts sending chunks of VM state (mostly memory) to the target. This continues until all of the state has been transferred to the target.
    3. The guest starts executing on the target VM. 4. The source VM is being removed.

    Pre-copy is the safest and fastest strategy for most cases. Furthermore, it can be easily cancelled, can utilize multithreading, and more. If there is no real reason to use another strategy, this is definitely the strategy to go with.

    However, on some cases migrations might not converge easily, that is, by the time the chunk of source VM state would be received by the target VM, it would already be mutated by the source VM (which is the VM the guest executes on). There are many reasons for migrations to fail converging, such as a high dirty-rate or low resources like network bandwidth and CPU. On such scenarios, see the following alternative strategies below.

    VM Live Migration Strategy: Post-copy

    The way post-copy migrations work is as following:

    1. The target VM is created.
    2. The guest is being run on the target VM.
    3. The source starts sending chunks of VM state (mostly memory) to the target.
    4. When the guest, running on the target VM, would access memory: 1. If the memory exists on the target VM, the guest can access it. 2. Otherwise, the target VM asks for a chunk of memory from the source VM.
    5. Once all of the memory state is updated at the target VM, the source VM is being removed.

    The main idea here is that the guest starts to run immediately on the target VM. This approach has advantages and disadvantages:

    Advantages:

    • The same memory chink is never being transferred twice. This is possible due to the fact that with post-copy it doesn't matter that a page had been dirtied since the guest is already running on the target VM.
    • This means that a high dirty-rate has much less effect.
    • Consumes less network bandwidth.

    Disadvantages:

    • When using post-copy, the VM state has no one source of truth. When the guest (running on the target VM) writes to memory, this memory is one part of the guest's state, but some other parts of it may still be updated only at the source VM. This situation is generally dangerous, since, for example, if either the target or guest VMs crash the state cannot be recovered.
    • Slow warmup: when the guest starts executing, no memory is present at the target VM. Therefore, the guest would have to wait for a lot of memory in a short period of time.
    • Slower than pre-copy on most cases.
    • Harder to cancel a migration.

    VM Live Migration Strategy: Auto-converge

    Auto-converge is a technique to help pre-copy migrations converge faster without changing the core algorithm of how the migration works.

    Since a high dirty-rate is usually the most significant factor for migrations to not converge, auto-converge simply throttles the guest's CPU. If the migration would converge fast enough, the guest's CPU would not be throttled or throttled negligibly. But, if the migration would not converge fast enough, the CPU would be throttled more and more as time goes.

    This technique dramatically increases the probability of the migration converging eventually.

    Observe the VM Live Migration Progress and Result

    Migration Timeouts

    Depending on the type, the live migration process will copy virtual machine memory pages and disk blocks to the destination. During this process non-locked pages and blocks are being copied and become free for the instance to use again. To achieve a successful migration, it is assumed that the instance will write to the free pages and blocks (pollute the pages) at a lower rate than these are being copied.

    Completion Time

    In some cases the virtual machine can write to different memory pages / disk blocks at a higher rate than these can be copied, which will prevent the migration process from completing in a reasonable amount of time. In this case, live migration will be aborted if it is running for a long period of time. The timeout is calculated base on the size of the VMI, it's memory and the ephemeral disks that are needed to be copied. The configurable parameter completionTimeoutPerGiB, which defaults to 800s is the time for GiB of data to wait for the migration to be completed before aborting it. A VMI with 8Gib of memory will time out after 6400 seconds.

    Progress Timeout

    A VM Live Migration will also be aborted when it notices that copying memory doesn't make any progress. The time to wait for live migration to make progress in transferring data is configurable by the progressTimeout parameter, which defaults to 150 seconds.

    VM Live Migration Configurations

    Changing Cluster Wide Migration Limits

    KubeVirt puts some limits in place so that migrations don't overwhelm the cluster. By default, it is to only run 5 migrations in parallel with an additional limit of a maximum of 2 outbound migrations per node. Finally, every migration is limited to a bandwidth of 64MiB/s.

    You can change these values in the kubevirt CR:

        apiVersion: kubevirt.io/v1
    kind: Kubevirt
    metadata:
    name: kubevirt
    namespace: kubevirt
    spec:
    configuration:
    migrations:
    parallelMigrationsPerCluster: 5
    parallelOutboundMigrationsPerNode: 2
    bandwidthPerMigration: 64Mi
    completionTimeoutPerGiB: 800
    progressTimeout: 150
    disableTLS: false
    nodeDrainTaintKey: "kubevirt.io/drain"
    allowAutoConverge: false ---------------------> related to: Auto-converge
    allowPostCopy: false -------------------------> related to: Post-copy
    unsafeMigrationOverride: false

    Remember that most of these configurations can be overridden and fine-tuned to a specified group of VMs. For more information, please refer to the Migration Policies section below.

    Migration Policies

    Migration policies provides a new way of applying migration configurations to Virtual Machines. The policies can refine Kubevirt CR's MigrationConfiguration that sets the cluster-wide migration configurations. This way, the cluster-wide settings default how the migration policy can be refined (i.e., changed, removed, or added).

    Remember that migration policies are in version v1alpha1. This means that this API is not fully stable yet and that APIs may change in the future.

    Migration Configurations

    Currently, the MigrationPolicy spec only includes the following configurations from Kubevirt CR's MigrationConfiguration. (In the future, more configurations that aren't part of Kubevirt CR will be added):

    apiVersion: migrations.kubevirt.io/v1alpha1
    kind: MigrationPolicy
    spec:
    allowAutoConverge: true
    bandwidthPerMigration: 217Ki
    completionTimeoutPerGiB: 23
    allowPostCopy: false

    All the above fields are optional. When omitted, the configuration will be applied as defined in KubevirtCR's MigrationConfiguration. This way, KubevirtCR will serve as a configurable set of defaults for both VMs that are not bound to any MigrationPolicy and VMs that are bound to a MigrationPolicy that does not define all fields of the configurations.

    Matching Policies to VMs

    Next in the spec are the selectors defining the group of VMs to apply the policy. The options to do so are the following.

    This policy applies to the VMs in namespaces that have all the required labels:

    apiVersion: migrations.kubevirt.io/v1alpha1
    kind: MigrationPolicy
    spec:
    selectors:
    namespaceSelector:
    hpc-workloads: true # Matches a key and a value

    The policy below applies to the VMs that have all the required labels:

    apiVersion: migrations.kubevirt.io/v1alpha1
    kind: MigrationPolicy
    spec:
    selectors:
    virtualMachineInstanceSelector:
    workload-type: db # Matches a key and a value

    References

    Documents

    Libvirt Guest Migration

    Libvirt has a chapter to describe the pricipal of VM/Guest Live Migration.

    https://libvirt.org/migration.html

    Kubevirt Live Migration

    https://kubevirt.io/user-guide/operations/live_migration/

    Source Code

    The VM Live Migration related configuration options are passed to each layer correspondingly.

    Kubevirt

    https://github.com/kubevirt/kubevirt/blob/d425593ae392111dab80403ef0cde82625e37653/pkg/virt-launcher/virtwrap/live-migration-source.go#L103

    ...
    import "libvirt.org/go/libvirt"

    ...

    func generateMigrationFlags(isBlockMigration, migratePaused bool, options *cmdclient.MigrationOptions) libvirt.DomainMigrateFlags {
    ...
    if options.AllowAutoConverge {
    migrateFlags |= libvirt.MIGRATE_AUTO_CONVERGE
    }
    if options.AllowPostCopy {
    migrateFlags |= libvirt.MIGRATE_POSTCOPY
    }
    ...
    }

    Go Package Libvirt

    https://pkg.go.dev/libvirt.org/go/libvirt

    const (
    ...
    MIGRATE_AUTO_CONVERGE = DomainMigrateFlags(C.VIR_MIGRATE_AUTO_CONVERGE)
    MIGRATE_RDMA_PIN_ALL = DomainMigrateFlags(C.VIR_MIGRATE_RDMA_PIN_ALL)
    MIGRATE_POSTCOPY = DomainMigrateFlags(C.VIR_MIGRATE_POSTCOPY)
    ...
    )

    Libvirt

    https://github.com/libvirt/libvirt/blob/bfe53e9145cd5996a791c5caff0686572b850f82/include/libvirt/libvirt-domain.h#L1030

        /* Enable algorithms that ensure a live migration will eventually converge.
    * This usually means the domain will be slowed down to make sure it does
    * not change its memory faster than a hypervisor can transfer the changed
    * memory to the destination host. VIR_MIGRATE_PARAM_AUTO_CONVERGE_*
    * parameters can be used to tune the algorithm.
    *
    * Since: 1.2.3
    */
    VIR_MIGRATE_AUTO_CONVERGE = (1 << 13),
    ...
    /* Setting the VIR_MIGRATE_POSTCOPY flag tells libvirt to enable post-copy
    * migration. However, the migration will start normally and
    * virDomainMigrateStartPostCopy needs to be called to switch it into the
    * post-copy mode. See virDomainMigrateStartPostCopy for more details.
    *
    * Since: 1.3.3
    */
    VIR_MIGRATE_POSTCOPY = (1 << 15),
    - + \ No newline at end of file diff --git a/markdown-page/index.html b/markdown-page/index.html index 8a12aa36..6056de75 100644 --- a/markdown-page/index.html +++ b/markdown-page/index.html @@ -9,13 +9,13 @@ Markdown page example | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    Markdown page example

    You don't need React to write simple standalone pages.

    - + \ No newline at end of file diff --git a/rancher-intergration/node-driver/index.html b/rancher-intergration/node-driver/index.html index edbb851e..57087f4a 100644 --- a/rancher-intergration/node-driver/index.html +++ b/rancher-intergration/node-driver/index.html @@ -9,13 +9,13 @@ Harvester Node Driver | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    Harvester Node Driver

    The Harvester node driver is used to provision VMs in the Harvester cluster. In this section, you'll learn how to configure Rancher to use the Harvester node driver to launch and manage Kubernetes clusters.

    A node driver is the same as a Docker Machine driver, and the project repo is available at harvester/docker-machine-driver-harvester.

    Available as of v0.2.0

    Add Harvester Node Driver

    ISO Mode

    In the ISO mode, the Harvester driver has been installed by default, and the user does not need to add it manually.

    App Mode

    1. Navigate to the Rancher UI.
    2. From the Global view, choose Tools > Drivers in the navigation bar. From the Drivers page, select the Node Drivers tab. In versions before v2.2.0, you can select Node Drivers directly in the navigation bar.
    3. Click Add Node Driver.
    4. Enter Download URL(docker-machine-driver-harvester) and Custom UI URL(ui-driver-harvester).
    5. Add domains to the Whitelist Domains.
    6. Click Create.

    Create Cluster

    Now users can access the Rancher UI from Harvester, spin up Kubernetes clusters on top of the Harvester cluster, and manage them there.

    Prerequisite: VLAN network is required for Harvester node driver

    1. From the Global view, click Add Cluster.
    2. Click Harvester.
    3. Select a Template.
    4. Fill out the rest of the form for creating a cluster.
    5. Click Create.

    See launching kubernetes and provisioning nodes in an infrastructure provider for more info.

    Create Node Template

    You can use the Harvester node driver to create node templates and eventually node pools for your Kubernetes cluster.

    1. Configure Account Access. For Harvester embedding Rancher, you can choose Internal Harvester, which will use the harvester.harvester-system as the default Host, 8443 as the default Port.
    2. Configure Instance Options
      • Configure the CPU, memory, disk, and disk bus.
      • Select an OS image that is compatible with the cloud-init config.
      • Select a network that the node driver is able to connect to, currently only VLAN is supported.
      • Enter the SSH User, the username will be used to ssh to nodes. For example, a default user of the Ubuntu cloud image will be ubuntu.
    3. Enter a RANCHER TEMPLATE name.

    See nodes hosted by an infrastructure provider for more info.

    - + \ No newline at end of file diff --git a/rancher-intergration/rancher-integration/index.html b/rancher-intergration/rancher-integration/index.html index 7e52c4be..e2a4e828 100644 --- a/rancher-intergration/rancher-integration/index.html +++ b/rancher-intergration/rancher-integration/index.html @@ -9,13 +9,13 @@ Rancher Integration | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    Rancher Integration

    Available as of v0.2.0

    Rancher is an open source multi-cluster management platform. Harvester has integrated Rancher into its HCI mode installation by default.

    Enable Rancher Dashboard

    Users can enable the Rancher dashboard by going to the Harvester Settings page.

    1. Click the actions of the rancher-enabled setting.
    2. Select the Enable option and click the save button.
    3. On the top-right corner the Rancher dashboard button will appear.
    4. Click the Rancher button, and it will open a new tab to navigate to the Rancher dashboard.

    For more detail about how to use the Rancher, you may refer to this doc.

    Creating K8s Clusters using the Harvester Node Driver

    Harvester node driver is used to provision VMs in the Harvester cluster, which Rancher uses to launch and manage Kubernetes clusters.

    In the ISO mode, the Harvester driver has been added by default. Users can reference this doc for more details.

    - + \ No newline at end of file diff --git a/upgrade/index.html b/upgrade/index.html index 5457c5ec..1a11b061 100644 --- a/upgrade/index.html +++ b/upgrade/index.html @@ -9,7 +9,7 @@ Upgrading Harvester | The open-source hyperconverged infrastructure solution for a cloud-native world - + @@ -19,7 +19,7 @@ iso-mode-upgrade
  • Wait until the upgrade completes. The node will be rebooted and show Ready again in the terminal console.
  • Go to the Hosts page in Harvester UI.
  • Find the node that just completed the upgrade. Click on the action dropdown. Click on the Disable Maintenance Mode action.
  • For the rest nodes of the cluster, repeat steps 2 to 9 to upgrade them one by one.
  • Upgrade in the UI

    Prerequisites: Internet access is required to perform a live upgrade in the UI.

    1. Go to the Dashboard page in Harvester UI.
    2. When newer versions are available, an upgrade button will be shown in the top-right corner. Click upgrade. upgrade-ui
    3. Select a version to upgrade. Click upgrade.
    4. Wait until the upgrade to complete. You can view the upgrade progress by clicking the circle icon in the top navigation bar.
    - + \ No newline at end of file diff --git a/vm-management/access-to-the-vm/index.html b/vm-management/access-to-the-vm/index.html index 901a4a84..981e1fb4 100644 --- a/vm-management/access-to-the-vm/index.html +++ b/vm-management/access-to-the-vm/index.html @@ -9,13 +9,13 @@ Access to the VM | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    Access to the VM

    Once the VM is up and running, it can be accessed using either VNC or the serial console from the Harvester UI.

    Optionally, connect directly from your computer's SSH client.

    Access with the UI

    VMs can be accessed from the UI directly using either VNC or the serial console.

    If the VGA display is not enabled on the VM (e.g., when using the Ubuntu minimal cloud image), the VM can be accessed with the serial console.

    Access using SSH

    Use the address in a terminal emulation client (such as Putty) or use the following command line to access the VM directly from your computer's SSH client:

     ssh -i ~/.ssh/your-ssh-key user@<ip-address-or-hostname>

    - + \ No newline at end of file diff --git a/vm-management/backup-restore/index.html b/vm-management/backup-restore/index.html index b4f1e395..1b6ae993 100644 --- a/vm-management/backup-restore/index.html +++ b/vm-management/backup-restore/index.html @@ -9,13 +9,13 @@ VM Backup & Restore | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    VM Backup & Restore

    Available as of v0.2.0

    VM backups are created from the Virtual Machines page. The VM backup volumes will be stored in the Backup Target (an NFS or S3 server) and they can be used to either restore a new VM or replace an existing VM.

    Prerequisite: A backup target must be set up. For more information, see Backup Target Setup. If the BackupTarget has not been set, you’ll be presented with a prompt message.

    Backup Target Setup

    A backup target is an endpoint used to access a backup store in Harvester. A backup store is an NFS server or S3 compatible server that stores the backups of VM volumes. The backup target can be set at Settings > backup-target.

    ParameterTypeDescription
    TypestringChoose S3 or NFS
    EndpointstringEndPoint is a hostname or an IP address. Can be left empty for AWS S3.
    BucketNamestringName of the bucket
    BucketRegionstringRegion of the bucket
    AccessKeyIDstringAccessKeyID is like a user-id that uniquely identifies your account.
    SecretAccessKeystringSecretAccessKey is the password to your account.
    CertificatestringPaste the certificate if you want to use a self-signed SSL certificate of your s3 server
    VirtualHostedStyleboolUse virtual-hosted-style access only, e.g., Alibaba Cloud(Aliyun) OSS

    Create a VM backup

    1. Once the backup target is set, go to the Virtual Machines page.
    2. Click Take Backup of the VM actions to create a new VM backup.
    3. Set a custom backup name and click Create to create a new VM backup.

    Result: The backup is created. A notification message will be promoted, and users can go to the Advanced > Backups page to view all VM backups.

    The ReadyToUse status will be set to true once the Backup is complete.

    Users can either choose to restore a new VM or replace an existing VM using this backup.

    Restore a new VM using a backup

    To restore a new VM from a backup, follow these steps:

    1. Go to the Backups page.
    2. Specify the new VM name and click Create.
    3. A new VM will be restored using the backup volumes and metadata, and users can access it from the Virtual Machines page.

    Replace an Existing VM using a backup

    You can replace an existing VM using the backup with the same VM backup target .

    You can choose to either delete the previous volumes or retain them. By default, all previous volumes are deleted.

    Requirements: The VM must exist and is required to be in the powered-off status.

    1. Go to the Backups page.
    2. Click Create.

    The restore process can be viewed from the Virtual Machines page.

    - + \ No newline at end of file diff --git a/vm-management/create-vm/index.html b/vm-management/create-vm/index.html index 1035866d..e4bff4cd 100644 --- a/vm-management/create-vm/index.html +++ b/vm-management/create-vm/index.html @@ -9,13 +9,13 @@ How to Create a VM | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    How to Create a VM

    Create one or more virtual machines from the Virtual Machines page.

    1. Choose the option to create either one or multiple VM instances.
    2. The VM name is required.
    3. (Optional) you can select to use the VM template. By default we have added ISO, raw, and Windows image templates.
    4. Configure the CPU and Memory of the VM.
    5. Select a custom VM image.
    6. Select SSH keys or upload a new one.
    7. To add more disks to the VM, go to the Volumes tab. The default disk will be the root disk.
    8. To configure networks, go to the Networks tab. The Management Network is added by default. It is also possible to add secondary networks to the VMs using vlan networks (configured on Advanced > Networks).
    9. Optional: Configure advanced options like hostname and cloud-init data in the Advanced Options section.

    Cloud config examples

    Config for the password of the default user:

    #cloud-config
    password: password
    chpasswd: { expire: False }
    ssh_pwauth: True

    Network-data configuration using DHCP:

    version: 1
    config:
    - type: physical
    name: eth0
    subnets:
    - type: dhcp
    - type: physical
    name: eth1
    subnets:
    - type: dhcp

    You can also use the Cloud Config Template feature to include a pre-defined cloud-init config for the VM.

    Networks

    Management Network

    A management network represents the default vm eth0 interface configured by the cluster network solution that is present in each VM.

    By default, a VM can be accessed via the management network.

    Secondary Network

    It is also possible to connect VMs using additional networks with Harvester's built-in vlan networks.

    - + \ No newline at end of file diff --git a/vm-management/live-migration/index.html b/vm-management/live-migration/index.html index 594aacfa..1d0db867 100644 --- a/vm-management/live-migration/index.html +++ b/vm-management/live-migration/index.html @@ -9,13 +9,13 @@ Live Migration | The open-source hyperconverged infrastructure solution for a cloud-native world - +

    Live Migration

    Live migration means moving a virtual machine to a different host without downtime.

    Notes:

    • Live migration is not allowed when the virtual machine is using a management network of bridge interface type.
    • To support live migration, 3 or more hosts in the Harvester cluster are required due to a known issue.

    Starting a migration

    1. Go to the Virtual Machines page.
    2. Find the virtual machine that you want to migrate and select Vertical (... ) > Migrate.
    3. Choose the node that you want to migrate the virtual machine to. Click Apply.

    Aborting a migration

    1. Go to the Virtual Machines page.
    2. Find the virtual machine that is in migrating status and you want to abort the migration. Select Vertical (... ) > Abort Migration.

    Migration timeouts

    Completion timeout

    The live migration process will copy virtual machine memory pages and disk blocks to the destination. In some cases, the virtual machine can write to different memory pages/disk blocks at a higher rate than these can be copied, which will prevent the migration process from being completed in a reasonable amount of time. Live migration will be aborted if it exceeds the completion timeout which is 800s per GiB of data. For example, a virtual machine with 8 GiB of memory will time out after 6400 seconds.

    Progress timeout

    Live migration will also be aborted when it is noticed that copying memory doesn't make any progress in 150s.

    - + \ No newline at end of file