Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

neonvm-runner: arm support #1119

Merged
merged 7 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion neonvm-runner/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ COPY . .
# Build
RUN CGO_ENABLED=0 go build -o /runner neonvm-runner/cmd/*.go


FROM alpine:3.19

RUN apk add --no-cache \
Expand All @@ -21,12 +20,16 @@ RUN apk add --no-cache \
busybox-extras \
e2fsprogs \
qemu-system-x86_64 \
qemu-system-aarch64 \
mikhail-sakhnov marked this conversation as resolved.
Show resolved Hide resolved
qemu-img \
cgroup-tools \
openssh


COPY --from=builder /runner /usr/bin/runner
COPY neonvm-kernel/vmlinuz /vm/kernel/vmlinuz
COPY neonvm-runner/ssh_config /etc/ssh/ssh_config
# QEMU_EFI used only by runner running on the arm architecture
RUN wget https://releases.linaro.org/components/kernel/uefi-linaro/16.02/release/qemu64/QEMU_EFI.fd -O /vm/QEMU_EFI_ARM.fd

ENTRYPOINT ["/sbin/tini", "--", "runner"]
103 changes: 82 additions & 21 deletions neonvm-runner/cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"os/signal"
"path/filepath"
"regexp"
"runtime"
"strings"
"sync"
"sync/atomic"
Expand Down Expand Up @@ -52,8 +53,12 @@ import (
)

const (
QEMU_BIN = "qemu-system-x86_64"
QEMU_IMG_BIN = "qemu-img"
qemuBinArm64 = "qemu-system-aarch64"
qemuBinX8664 = "qemu-system-x86_64"
qemuImgBin = "qemu-img"

architectureArm64 = "arm64"
architectureAmd64 = "amd64"
defaultKernelPath = "/vm/kernel/vmlinuz"

rootDiskPath = "/vm/images/rootdisk.qcow2"
Expand Down Expand Up @@ -403,14 +408,14 @@ func calcDirUsage(dirPath string) (int64, error) {
func createSwap(diskPath string, swapSize *resource.Quantity) error {
tmpRawFile := "swap.raw"

if err := execFg(QEMU_IMG_BIN, "create", "-q", "-f", "raw", tmpRawFile, fmt.Sprintf("%d", swapSize.Value())); err != nil {
if err := execFg(qemuImgBin, "create", "-q", "-f", "raw", tmpRawFile, fmt.Sprintf("%d", swapSize.Value())); err != nil {
return err
}
if err := execFg("mkswap", "-L", swapName, tmpRawFile); err != nil {
return err
}

if err := execFg(QEMU_IMG_BIN, "convert", "-q", "-f", "raw", "-O", "qcow2", "-o", "cluster_size=2M,lazy_refcounts=on", tmpRawFile, diskPath); err != nil {
if err := execFg(qemuImgBin, "convert", "-q", "-f", "raw", "-O", "qcow2", "-o", "cluster_size=2M,lazy_refcounts=on", tmpRawFile, diskPath); err != nil {
return err
}

Expand Down Expand Up @@ -466,7 +471,7 @@ func createQCOW2(diskName string, diskPath string, diskSize *resource.Quantity,
return err
}

if err := execFg(QEMU_IMG_BIN, "convert", "-q", "-f", "raw", "-O", "qcow2", "-o", "cluster_size=2M,lazy_refcounts=on", "ext4.raw", diskPath); err != nil {
if err := execFg(qemuImgBin, "convert", "-q", "-f", "raw", "-O", "qcow2", "-o", "cluster_size=2M,lazy_refcounts=on", "ext4.raw", diskPath); err != nil {
return err
}

Expand Down Expand Up @@ -618,9 +623,14 @@ type Config struct {
appendKernelCmdline string
skipCgroupManagement bool
diskCacheSettings string
memoryProvider vmv1.MemoryProvider
autoMovableRatio string
cpuScalingMode vmv1.CpuScalingMode
// memoryProvider is a memory provider to use. Validated in newConfig.
memoryProvider vmv1.MemoryProvider
// autoMovableRatio value for VirtioMem provider. Validated in newConfig.
autoMovableRatio string
// cpuScalingMode is a mode to use for CPU scaling. Validated in newConfig.
cpuScalingMode vmv1.CpuScalingMode
// System CPU architecture. Set automatically equal to runtime.GOARCH.
architecture string
}

func newConfig(logger *zap.Logger) *Config {
Expand All @@ -631,9 +641,10 @@ func newConfig(logger *zap.Logger) *Config {
appendKernelCmdline: "",
skipCgroupManagement: false,
diskCacheSettings: "cache=none",
memoryProvider: "", // Require that this is explicitly set. We'll check later.
autoMovableRatio: "", // Require that this is explicitly set IFF memoryProvider is VirtioMem. We'll check later.
cpuScalingMode: "", // Require that this is explicitly set. We'll check later.
memoryProvider: "",
autoMovableRatio: "",
cpuScalingMode: "",
architecture: runtime.GOARCH,
mikhail-sakhnov marked this conversation as resolved.
Show resolved Hide resolved
}
flag.StringVar(&cfg.vmSpecDump, "vmspec", cfg.vmSpecDump,
"Base64 encoded VirtualMachine json specification")
Expand Down Expand Up @@ -868,7 +879,7 @@ func resizeRootDisk(logger *zap.Logger, vmSpec *vmv1.VirtualMachineSpec) error {
VirtualSize int64 `json:"virtual-size"`
}
// get current disk size by qemu-img info command
qemuImgOut, err := exec.Command(QEMU_IMG_BIN, "info", "--output=json", rootDiskPath).Output()
qemuImgOut, err := exec.Command(qemuImgBin, "info", "--output=json", rootDiskPath).Output()
if err != nil {
return fmt.Errorf("could not get root image size: %w", err)
}
Expand All @@ -882,7 +893,7 @@ func resizeRootDisk(logger *zap.Logger, vmSpec *vmv1.VirtualMachineSpec) error {
if !vmSpec.Guest.RootDisk.Size.IsZero() {
if vmSpec.Guest.RootDisk.Size.Cmp(*imageSizeQuantity) == 1 {
logger.Info(fmt.Sprintf("resizing rootDisk from %s to %s", imageSizeQuantity.String(), vmSpec.Guest.RootDisk.Size.String()))
if err := execFg(QEMU_IMG_BIN, "resize", rootDiskPath, fmt.Sprintf("%d", vmSpec.Guest.RootDisk.Size.Value())); err != nil {
if err := execFg(qemuImgBin, "resize", rootDiskPath, fmt.Sprintf("%d", vmSpec.Guest.RootDisk.Size.Value())); err != nil {
return fmt.Errorf("failed to resize rootDisk: %w", err)
}
} else {
Expand All @@ -904,14 +915,13 @@ func buildQEMUCmd(
// prepare qemu command line
qemuCmd := []string{
"-runas", "qemu",
"-machine", "q35",
"-machine", getMachineType(cfg.architecture),
"-nographic",
"-no-reboot",
"-nodefaults",
"-only-migratable",
"-audiodev", "none,id=noaudio",
"-serial", "pty",
"-serial", "stdio",
"-msg", "timestamp=on",
"-qmp", fmt.Sprintf("tcp:0.0.0.0:%d,server,wait=off", vmSpec.QMP),
"-qmp", fmt.Sprintf("tcp:0.0.0.0:%d,server,wait=off", vmSpec.QMPManual),
Expand Down Expand Up @@ -941,6 +951,21 @@ func buildQEMUCmd(
}
qemuCmd = append(qemuCmd, "-drive", fmt.Sprintf("id=%s,file=%s,if=virtio,media=disk,%s,discard=unmap", swapName, dPath, cfg.diskCacheSettings))
}
switch cfg.architecture {
case architectureArm64:
// add custom firmware to have ACPI working
qemuCmd = append(qemuCmd, "-bios", "/vm/QEMU_EFI_ARM.fd")
// arm virt has only one UART, setup virtio-serial to add more /dev/hvcX
qemuCmd = append(qemuCmd,
"-chardev", "stdio,id=virtio-console",
"-device", "virtconsole,chardev=virtio-console",
)
case architectureAmd64:
// on amd we have multiple UART ports so we can just use serial stdio
qemuCmd = append(qemuCmd, "-serial", "stdio")
default:
logger.Fatal("unsupported architecture", zap.String("architecture", cfg.architecture))
}

for _, disk := range vmSpec.Disks {
switch {
Expand Down Expand Up @@ -1057,7 +1082,7 @@ func buildQEMUCmd(
qemuCmd = append(
qemuCmd,
"-kernel", cfg.kernelPath,
"-append", makeKernelCmdline(cfg, vmSpec, vmStatus, hostname),
"-append", makeKernelCmdline(cfg, logger, vmSpec, vmStatus, hostname),
)

// should runner receive migration ?
Expand All @@ -1069,12 +1094,12 @@ func buildQEMUCmd(
}

const (
baseKernelCmdline = "panic=-1 init=/neonvm/bin/init console=ttyS1 loglevel=7 root=/dev/vda rw"
baseKernelCmdline = "panic=-1 init=/neonvm/bin/init loglevel=7 root=/dev/vda rw"
kernelCmdlineDIMMSlots = "memhp_default_state=online_movable"
kernelCmdlineVirtioMemTmpl = "memhp_default_state=online memory_hotplug.online_policy=auto-movable memory_hotplug.auto_movable_ratio=%s"
)

func makeKernelCmdline(cfg *Config, vmSpec *vmv1.VirtualMachineSpec, vmStatus *vmv1.VirtualMachineStatus, hostname string) string {
func makeKernelCmdline(cfg *Config, logger *zap.Logger, vmSpec *vmv1.VirtualMachineSpec, vmStatus *vmv1.VirtualMachineStatus, hostname string) string {
cmdlineParts := []string{baseKernelCmdline}

switch cfg.memoryProvider {
Expand Down Expand Up @@ -1103,6 +1128,18 @@ func makeKernelCmdline(cfg *Config, vmSpec *vmv1.VirtualMachineSpec, vmStatus *v
cmdlineParts = append(cmdlineParts, fmt.Sprintf("maxcpus=%d", vmSpec.Guest.CPUs.Min.RoundedUp()))
}

switch cfg.architecture {
case architectureArm64:
// explicitly enable acpi if we run on arm
cmdlineParts = append(cmdlineParts, "acpi=on")
// use virtio-serial device kernel console
cmdlineParts = append(cmdlineParts, "console=hvc0")
case architectureAmd64:
cmdlineParts = append(cmdlineParts, "console=ttyS1")
default:
logger.Fatal("unsupported architecture", zap.String("architecture", cfg.architecture))
}

return strings.Join(cmdlineParts, " ")
}

Expand Down Expand Up @@ -1149,7 +1186,6 @@ func runQEMU(

wg.Add(1)
go terminateQemuOnSigterm(ctx, logger, &wg)

var callbacks cpuServerCallbacks
// lastValue is used to store last fractional CPU request
// we need to store the value as is because we can't convert it back from MilliCPU
Expand Down Expand Up @@ -1181,13 +1217,14 @@ func runQEMU(
wg.Add(1)
go forwardLogs(ctx, logger, &wg)

qemuBin := getQemuBinaryName(cfg.architecture)
var bin string
var cmd []string
if !cfg.skipCgroupManagement {
bin = "cgexec"
cmd = append([]string{"-g", fmt.Sprintf("cpu:%s", cgroupPath), QEMU_BIN}, qemuCmd...)
cmd = append([]string{"-g", fmt.Sprintf("cpu:%s", cgroupPath), qemuBin}, qemuCmd...)
} else {
bin = QEMU_BIN
bin = qemuBin
cmd = qemuCmd
}

Expand All @@ -1207,6 +1244,30 @@ func runQEMU(
return err
}

func getQemuBinaryName(architecture string) string {
switch architecture {
case architectureArm64:
return qemuBinArm64
case architectureAmd64:
return qemuBinX8664
default:
panic(fmt.Errorf("unknown architecture %s", architecture))
}
}

func getMachineType(architecture string) string {
mikhail-sakhnov marked this conversation as resolved.
Show resolved Hide resolved
switch architecture {
case architectureArm64:
// virt is the most up to date and generic ARM machine architecture
return "virt"
case architectureAmd64:
// q35 is the most up to date and generic x86_64 machine architecture
return "q35"
default:
panic(fmt.Errorf("unknown architecture %s", architecture))
}
}

func handleCPUChange(
logger *zap.Logger,
w http.ResponseWriter,
Expand Down
4 changes: 2 additions & 2 deletions pkg/neonvm/controllers/vm_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"fmt"
"os"
"reflect"
sysruntime "runtime"
Omrigan marked this conversation as resolved.
Show resolved Hide resolved
"strconv"
"time"

Expand Down Expand Up @@ -1213,7 +1214,6 @@ func affinityForVirtualMachine(vm *vmv1.VirtualMachine) *corev1.Affinity {
if a.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution == nil {
a.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution = &corev1.NodeSelector{}
}

// if NodeSelectorTerms list is empty - add default values (arch==amd64 or os==linux)
if len(a.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms) == 0 {
a.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms = append(
Expand All @@ -1223,7 +1223,7 @@ func affinityForVirtualMachine(vm *vmv1.VirtualMachine) *corev1.Affinity {
{
Key: "kubernetes.io/arch",
Operator: "In",
Values: []string{"amd64"},
mikhail-sakhnov marked this conversation as resolved.
Show resolved Hide resolved
Values: []string{sysruntime.GOARCH},
},
{
Key: "kubernetes.io/os",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ status:
status: "True"
cpus: 250m
memorySize: 1Gi
memoryProvider: DIMMSlots
memoryProvider: VirtioMem
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ spec:
min: 1
max: 5
use: 1
memoryProvider: DIMMSlots
memoryProvider: VirtioMem
sharnoff marked this conversation as resolved.
Show resolved Hide resolved
rootDisk:
image: vm-postgres:15-bullseye
size: 8Gi
Expand All @@ -56,7 +56,7 @@ spec:
- name: monitor
port: 10301
extraNetwork:
enable: true
enable: false
disks:
- name: pgdata
mountPath: /var/lib/postgresql
Expand Down
Loading