From b03db3e3c0e2708e009aa108500601f533ae7b20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bombo?= Date: Fri, 15 Dec 2023 20:52:14 +0000 Subject: [PATCH 01/36] runtime: Resolve high UVM memory footprint Bug: https://microsoft.visualstudio.com/OS/_workitems/edit/43668151 Rationale: This is a temporary solution for optimizing memory usage for the current mechanism of requesting resources through pod Limit annotations: - if no Limits are specified and hence WorkloadMemMB is 0, set a default value 'StaticWorkloadDefaultMem' to allocate a default amount of memory for use for containers in the sandbox in addition to the base memory - if Limits are specified, the base memory and the sum of Limits are allocated. The end user needs to be aware of the minimum memory requirements for their pods, otherwise the pod will be stuck in the ContainerCreating state Testing: Manual testing, creating pods with Limits and without limits, and with two containers where each container has a limit, tested with integration in a SPEC file where the config variables were set via environment variables via the make command Adapted by @mfrw from 3.1.0 to apply to 3.2.0 Signed-off-by: Muhammad Falak R Wani Signed-off-by: Manuel Huber runtime: Remove unused VMM options for mem alloc - We only ever tested these fork changes with CLH+MSHV - Remove these options as we don't use QEMU/FC Signed-off-by: Manuel Huber --- src/runtime/Makefile | 6 +++++- src/runtime/config/configuration-clh.toml.in | 5 +++++ src/runtime/pkg/katautils/config.go | 2 ++ src/runtime/pkg/oci/utils.go | 11 +++++++++++ src/runtime/virtcontainers/sandbox.go | 3 +++ 5 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/runtime/Makefile b/src/runtime/Makefile index 31c65627d3eb..5a3a8f9ee01b 100644 --- a/src/runtime/Makefile +++ b/src/runtime/Makefile @@ -205,7 +205,7 @@ DEFVCPUS := 1 # Default maximum number of vCPUs DEFMAXVCPUS := 0 # Default memory size in MiB -DEFMEMSZ := 2048 +DEFMEMSZ ?= 2048 # Default memory slots # Cases to consider : # - nvdimm rootfs image @@ -286,6 +286,9 @@ DEFSTATICRESOURCEMGMT ?= false DEFSTATICRESOURCEMGMT_TEE = true DEFSTATICRESOURCEMGMT_NV = true +# Default memory for use for workloads within the sandbox if no specific workload memory value is requested +DEFSTATICSANDBOXWORKLOADMEM ?= 2048 + DEFDISABLEIMAGENVDIMM ?= false DEFDISABLEIMAGENVDIMM_NV = true DEFDISABLEIMAGENVDIMM_CLH ?= true @@ -780,6 +783,7 @@ USER_VARS += DEFSTATICRESOURCEMGMT_FC USER_VARS += DEFSTATICRESOURCEMGMT_STRATOVIRT USER_VARS += DEFSTATICRESOURCEMGMT_TEE USER_VARS += DEFSTATICRESOURCEMGMT_NV +USER_VARS += DEFSTATICSANDBOXWORKLOADMEM USER_VARS += DEFBINDMOUNTS USER_VARS += DEFCREATECONTAINERTIMEOUT USER_VARS += DEFDANCONF diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in index 8718b56f3988..267c06b24fde 100644 --- a/src/runtime/config/configuration-clh.toml.in +++ b/src/runtime/config/configuration-clh.toml.in @@ -432,6 +432,11 @@ sandbox_cgroup_only = @DEFSANDBOXCGROUPONLY@ # - When running single containers using a tool like ctr, container sizing information will be available. static_sandbox_resource_mgmt = @DEFSTATICRESOURCEMGMT_CLH@ +# If set, the runtime will use the value as the default workload memory in MB for the sandbox when no workload memory request is passed +# down to the shim via the OCI when static sandbox resource management is enabled. With this, we ensure that workloads have a proper +# default amount of memory available within the sandbox. +static_sandbox_default_workload_mem=@DEFSTATICSANDBOXWORKLOADMEM@ + # If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path. # This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory. # If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts` diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index 29549c237852..44a5c865bee6 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -196,6 +196,7 @@ type runtime struct { Debug bool `toml:"enable_debug"` SandboxCgroupOnly bool `toml:"sandbox_cgroup_only"` StaticSandboxResourceMgmt bool `toml:"static_sandbox_resource_mgmt"` + StaticSandboxWorkloadDefaultMem uint32 `toml:"static_sandbox_default_workload_mem"` EnablePprof bool `toml:"enable_pprof"` DisableGuestEmptyDir bool `toml:"disable_guest_empty_dir"` CreateContainerTimeout uint64 `toml:"create_container_timeout"` @@ -1614,6 +1615,7 @@ func LoadConfiguration(configPath string, ignoreLogging bool) (resolvedConfigPat config.EnableVCPUsPinning = tomlConf.Runtime.EnableVCPUsPinning config.GuestSeLinuxLabel = tomlConf.Runtime.GuestSeLinuxLabel config.StaticSandboxResourceMgmt = tomlConf.Runtime.StaticSandboxResourceMgmt + config.StaticSandboxWorkloadDefaultMem = tomlConf.Runtime.StaticSandboxWorkloadDefaultMem config.SandboxCgroupOnly = tomlConf.Runtime.SandboxCgroupOnly config.DisableNewNetNs = tomlConf.Runtime.DisableNewNetNs config.EnablePprof = tomlConf.Runtime.EnablePprof diff --git a/src/runtime/pkg/oci/utils.go b/src/runtime/pkg/oci/utils.go index 99d86515a9bc..9535d3deae0b 100644 --- a/src/runtime/pkg/oci/utils.go +++ b/src/runtime/pkg/oci/utils.go @@ -153,6 +153,9 @@ type RuntimeConfig struct { // any later resource updates. StaticSandboxResourceMgmt bool + // Memory to allocate for workloads within the sandbox when workload memory is unspecified + StaticSandboxWorkloadDefaultMem uint32 + // Determines if create a netns for hypervisor process DisableNewNetNs bool @@ -1196,6 +1199,8 @@ func SandboxConfig(ocispec specs.Spec, runtime RuntimeConfig, bundlePath, cid st StaticResourceMgmt: runtime.StaticSandboxResourceMgmt, + StaticWorkloadDefaultMem: runtime.StaticSandboxWorkloadDefaultMem, + ShmSize: shmSize, VfioMode: runtime.VfioMode, @@ -1226,6 +1231,12 @@ func SandboxConfig(ocispec specs.Spec, runtime RuntimeConfig, bundlePath, cid st // with the base number of CPU/memory (which is equal to the default CPU/memory specified for the runtime // configuration or annotations) as well as any specified workload resources. if sandboxConfig.StaticResourceMgmt { + // If no Limits are set in pod config, use StaticWorkloadDefaultMem to ensure the containers generally + // have a reasonable amount of memory available + if sandboxConfig.SandboxResources.WorkloadMemMB == 0 { + sandboxConfig.SandboxResources.WorkloadMemMB = sandboxConfig.StaticWorkloadDefaultMem + } + sandboxConfig.SandboxResources.BaseCPUs = sandboxConfig.HypervisorConfig.NumVCPUsF sandboxConfig.SandboxResources.BaseMemMB = sandboxConfig.HypervisorConfig.MemorySize diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index 2b1d05b8f5f3..9e01461f9fd5 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -161,6 +161,9 @@ type SandboxConfig struct { HypervisorConfig HypervisorConfig + StaticWorkloadDefaultMem uint32 + + // Memory to allocate for workloads within the sandbox when workload memory is unspecified ShmSize uint64 SandboxResources SandboxResourceSizing From 14b1bfcc5f0e8e17d6f30c600d43f041d9b58d08 Mon Sep 17 00:00:00 2001 From: Manuel Huber Date: Tue, 5 Mar 2024 01:01:49 +0000 Subject: [PATCH 02/36] tools: Add initial igvm-builder and node-builder/azure-linux scripting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This branch starts introducing additional scripting to build, deploy and evaluate the components used in AKS' Pod Sandboxing and Confidential Containers preview features. This includes the capability to build the IGVM file and its reference measurement file for remote attestation. Signed-off-by: Manuel Huber tools: Improve igvm-builder and node-builder/azure-linux scripting - Support for Mariner 3 builds using OS_VERSION variable - Improvements to IGVM build process and flow as described in README - Adoption of using only cloud-hypervisor-cvm on CBL-Mariner Signed-off-by: Manuel Huber tools: Add package-tools-install functionality - Add script to install kata-containers(-cc)-tools bits - Minor improvements in README.md - Minor fix in package_install - Remove echo outputs in package_build Signed-off-by: Manuel Huber tools: Enable setting IGVM SVN - Allow setting SVN parameter for IGVM build scripting Signed-off-by: Manuel Huber node-builder: introduce BUILD_TYPE variable This lets developers build and deploy Kata in debug mode without having to make manual edits to the build scripts. With BUILD_TYPE=debug (default is release): * The agent is built in debug mode. * The agent is built with a permissive policy (using allow-all.rego). * The shim debug config file is used, ie. we create the symlink configuration-clh-snp-debug.toml <- configuration-clh-snp.toml. For example, building and deploying Kata-CC in debug mode is now as simple as: make BUILD_TYPE=debug all-confpods deploy-confpods Also do note that make still lets you override the other variables even after setting BUILD_TYPE. For example, you can use the production shim config with BUILD_TYPE=debug: make BUILD_TYPE=debug SHIM_USE_DEBUG_CONFIG=no all-confpods deploy-confpods Signed-off-by: Aurélien Bombo node-builder: introduce SHIM_REDEPLOY_CONFIG See README: when SHIM_REDEPLOY_CONFIG=no, the shim configuration is NOT redeployed, so that potential config changes made directly on the host during development aren't lost. Signed-off-by: Aurélien Bombo node-builder: Use img for Pod Sandboxing Switch from UVM initrd to image format Signed-off-by: Manuel Huber node-builder: Adapt README instructions - Sanitize containerd config snippet - Set podOverhead for Kata runtime class Signed-off-by: Manuel Huber tools: Adapt AGENT_POLICY_FILE path - Adapt path in uvm_build.sh script to comply with the usptream changes we pulled in Signed-off-by: Manuel Huber node-builder: Use Azure Linux 3 as default path - update recipe and node-builder scripting - change default value on rootfs-builder Signed-off-by: Manuel Huber node-builder: Deploy-only for AzL3 VMs - split deployment sections in node-builder README.md - install jq, curl dependencies within IGVM script - add path parameter to UVM install script Signed-off-by: Manuel Huber node-builder: Minor updates to README.md - no longer install make package, is part of meta package - remove superfluous popd - add note on permissive policy for ConfPods UVM builds Signed-off-by: Manuel Huber node-builder: Updates to README.md - with the latest 3.2.0.azl4 package on PMC, can remove OS_VERSION parameter and use the make deploy calls instead of copying files by hand for variant I (now aligned with Variant II) - with the latest changes on msft-main, set the podOverhead to 600Mi Signed-off-by: Manuel Huber node-builder: Fix SHIM_USE_DEBUG_CONFIG behavior Using a symlink would create a cycle after calling this script again when copying the final configuration at line 74 so we just use cp instead. Also, I moved this block to the end of the file to properly override the final config file. Signed-off-by: Aurélien Bombo node-builder: Build and install debug configuration for pod sandboxing For ease of debugging, install a configuration-clh-debug.toml for pod sandboxing as we do in Conf pods. Signed-off-by: Cameron Baird runtime: remove clh-snp config file usage in makefile Not needed to build vanilla kata Signed-off-by: Saul Paredes package_tools_install.sh: include nsdax.gpl.c Include nsdax.gpl.c Signed-off-by: Saul Paredes node-builder: fix typo in string comparison This also fixes a shellcheck error and lets us require the shellcheck-required job: In ./tools/osbuilder/node-builder/azure-linux/uvm_build.sh line 34: if [ -z "${UVM_KERNEL_HEADER_DIR}}" ]; then ^-- SC2157 (error): Argument to -z is always false due to literal strings. Signed-off-by: Aurélien Bombo docs: node-builder: fix static check error This fixes the below static check error to follow up on the infra fix from kata-containers/kata-containers#11646: 2025-07-31T19:32:45.0031829Z time="2025-07-31T19:32:44.990004665Z" level=fatal msg="found 2 parse errors:\nfile=\"tools/osbuilder/node-builder/azure-linux/README.md\": duplicate heading: \"Set up environment\" (heading: {Name:Set up environment MDName:Set up environment LinkName:set-up-environment Level:2})\nfile=\"tools/osbuilder/node-builder/azure-linux/README.md\": duplicate heading: \"Install build dependencies\" (heading: {Name:Install build dependencies MDName:Install build dependencies LinkName:install-build-dependencies Level:2})" commit=1d17f56b1aa7a880468b8e25d14467c92dca8eeb name=kata-check-markdown pid=9075 source=check-markdown version=0.0.1 Note: that is likely flagged because having two headings with the same name, even under different sections, makes it impossible to create a canonical heading link in Markdown. This should eventually be squashed into the node-builder commit. Signed-off-by: Aurélien Bombo docs: node-builder: Remove references to moby-containerd-cc As we adopted containerd2, we remove references to our prior forked containerd version. Signed-off-by: Manuel Huber node-builder: 2Mb aligned guest image size Build the mariner guest image using IMAGE_SIZE_ALIGNMENT_MB=2. Signed-off-by: Dan Mihai to-squash: node-builder: add reference to README.md This is needed to avoid the following static-checks error: 2025-08-05T21:27:20.0028337Z [static-checks.sh:808] ERROR: Document tools/osbuilder/node-builder/azure-linux/README.md is not referenced This commit is to be squashed into the node-builder commit. Signed-off-by: Aurélien Bombo --- .gitignore | 20 ++ tools/osbuilder/.gitignore | 2 + tools/osbuilder/Makefile | 17 +- tools/osbuilder/README.md | 4 + .../igvm-builder/azure-linux/config.sh | 25 ++ .../igvm-builder/azure-linux/igvm_lib.sh | 70 +++++ tools/osbuilder/igvm-builder/igvm_builder.sh | 82 +++++ .../node-builder/azure-linux/Makefile | 77 +++++ .../node-builder/azure-linux/README.md | 294 ++++++++++++++++++ .../node-builder/azure-linux/clean.sh | 72 +++++ .../node-builder/azure-linux/common.sh | 66 ++++ .../node-builder/azure-linux/package_build.sh | 97 ++++++ .../azure-linux/package_install.sh | 73 +++++ .../azure-linux/package_tools_install.sh | 66 ++++ .../node-builder/azure-linux/uvm_build.sh | 76 +++++ .../node-builder/azure-linux/uvm_install.sh | 42 +++ 16 files changed, 1081 insertions(+), 2 deletions(-) create mode 100644 tools/osbuilder/igvm-builder/azure-linux/config.sh create mode 100644 tools/osbuilder/igvm-builder/azure-linux/igvm_lib.sh create mode 100755 tools/osbuilder/igvm-builder/igvm_builder.sh create mode 100644 tools/osbuilder/node-builder/azure-linux/Makefile create mode 100644 tools/osbuilder/node-builder/azure-linux/README.md create mode 100755 tools/osbuilder/node-builder/azure-linux/clean.sh create mode 100755 tools/osbuilder/node-builder/azure-linux/common.sh create mode 100755 tools/osbuilder/node-builder/azure-linux/package_build.sh create mode 100755 tools/osbuilder/node-builder/azure-linux/package_install.sh create mode 100755 tools/osbuilder/node-builder/azure-linux/package_tools_install.sh create mode 100755 tools/osbuilder/node-builder/azure-linux/uvm_build.sh create mode 100755 tools/osbuilder/node-builder/azure-linux/uvm_install.sh diff --git a/.gitignore b/.gitignore index 252e072f2ee4..d7b2f49a6af1 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,23 @@ tools/packaging/static-build/agent/install_libseccomp.sh .direnv **/.DS_Store site/ + +# Microsoft-specific +.cargo/ +src/agent/samples/policy/test-input/ +src/tarfs/**/*.cmd +src/tarfs/**/*.ko +src/tarfs/**/*.mod +src/tarfs/**/*.mod.c +src/tarfs/**/*.o +src/tarfs/**/modules.order +src/tarfs/**/Module.symvers +src/tarfs-cvm/ +tools/osbuilder/kata-containers-igvm.img +tools/osbuilder/kata-containers-igvm-debug.img +tools/osbuilder/igvm-debug-measurement.cose +tools/osbuilder/igvm-measurement.cose +tools/osbuilder/root_hash.txt +tools/osbuilder/igvm.log +tools/osbuilder/kata-opa.service +tools/osbuilder/rootfs-builder/opa/ diff --git a/tools/osbuilder/.gitignore b/tools/osbuilder/.gitignore index dffa2ac94714..91a416e35e8e 100644 --- a/tools/osbuilder/.gitignore +++ b/tools/osbuilder/.gitignore @@ -10,3 +10,5 @@ kata-containers.img root_hash*.txt rootfs-builder/centos/RPM-GPG-KEY-* typescript +node-builder/azure-linux/agent-install +igvm-builder/igvm-tooling diff --git a/tools/osbuilder/Makefile b/tools/osbuilder/Makefile index 1b3aa4217a0a..b8383dbd28cb 100644 --- a/tools/osbuilder/Makefile +++ b/tools/osbuilder/Makefile @@ -8,6 +8,8 @@ TEST_RUNNER := $(MK_DIR)/tests/test_images.sh ROOTFS_BUILDER := $(MK_DIR)/rootfs-builder/rootfs.sh INITRD_BUILDER := $(MK_DIR)/initrd-builder/initrd_builder.sh IMAGE_BUILDER := $(MK_DIR)/image-builder/image_builder.sh +IGVM_BUILDER := $(MK_DIR)/igvm-builder/igvm_builder.sh +IGVM_SVN ?= 0 DISTRO ?= ubuntu BUILD_METHOD := distro @@ -16,11 +18,17 @@ AGENT_INIT ?= no USE_DOCKER ?= true ROOTFS_BUILD_DEST := $(shell pwd) IMAGES_BUILD_DEST := $(shell pwd) +IGVM_BUILD_DEST := $(shell pwd) ROOTFS_MARKER_SUFFIX := _rootfs.done TARGET_ROOTFS := $(ROOTFS_BUILD_DEST)/$(DISTRO)_rootfs TARGET_ROOTFS_MARKER := $(ROOTFS_BUILD_DEST)/.$(DISTRO)$(ROOTFS_MARKER_SUFFIX) TARGET_IMAGE := $(IMAGES_BUILD_DEST)/kata-containers.img TARGET_INITRD := $(IMAGES_BUILD_DEST)/kata-containers-initrd.img +TARGET_IGVM := $(IGVM_BUILD_DEST)/kata-containers-igvm.img +TARGET_IGVM_MSMT := $(IGVM_BUILD_DEST)/igvm-measurement.cose +TARGET_IGVM_DEBUG := $(IGVM_BUILD_DEST)/kata-containers-igvm-debug.img +TARGET_IGVM_DEBUG_MSMT:= $(IGVM_BUILD_DEST)/igvm-debug-measurement.cose +TARGET_IGVM_LOG := $(IGVM_BUILD_DEST)/igvm.log VERSION_FILE := ./VERSION VERSION := $(shell grep -v ^\# $(VERSION_FILE) 2>/dev/null || echo "unknown") @@ -86,7 +94,7 @@ endif ################################################################################ .PHONY: all -all: image initrd +all: image initrd igvm rootfs-%: $(ROOTFS_BUILD_DEST)/.%$(ROOTFS_MARKER_SUFFIX) @ # DONT remove. This is not cancellation rule. @@ -156,6 +164,10 @@ $(DRACUT_OVERLAY_DIR): mkdir -p $@/etc/modules-load.d echo $(DRACUT_KMODULES) | tr " " "\n" > $@/etc/modules-load.d/kata-modules.conf +.PHONY: igvm +igvm: $(TARGET_IMAGE) + $(IGVM_BUILDER) -o $(IGVM_BUILD_DEST) -s $(IGVM_SVN) + .PHONY: test test: $(TEST_RUNNER) "$(DISTRO)" @@ -208,7 +220,8 @@ install-scripts: .PHONY: clean clean: - rm -rf $(TARGET_ROOTFS_MARKER) $(TARGET_ROOTFS) $(TARGET_IMAGE) $(TARGET_INITRD) $(DRACUT_OVERLAY_DIR) + rm -rf $(TARGET_ROOTFS_MARKER) $(TARGET_ROOTFS) $(TARGET_IMAGE) $(TARGET_INITRD) $(DRACUT_OVERLAY_DIR) $(TARGET_IGVM) $(TARGET_IGVM_DEBUG) $(TARGET_IGVM_MSMT) $(TARGET_IGVM_DEBUG_MSMT) $(TARGET_IGVM_LOG) + rm -rf $(IGVM_TOOL_SRC) # Prints the name of the variable passed as suffix to the print- target, # E.g., if Makefile contains: diff --git a/tools/osbuilder/README.md b/tools/osbuilder/README.md index ea05162c9f29..f8fe104508a1 100644 --- a/tools/osbuilder/README.md +++ b/tools/osbuilder/README.md @@ -83,6 +83,10 @@ the image. Ubuntu is the default distro for building the rootfs, to use a different one, you can set `DISTRO=alpine|debian|ubuntu|cbl-mariner`. For example `make USE_DOCKER=true DISTRO=alpine rootfs` will make an Alpine rootfs using Docker. +### Azure Linux host node deployment + +See [the node builder documentation](node-builder/azure-linux/README.md). + ### Rootfs creation This section shows how to build a basic rootfs using the default distribution. diff --git a/tools/osbuilder/igvm-builder/azure-linux/config.sh b/tools/osbuilder/igvm-builder/azure-linux/config.sh new file mode 100644 index 000000000000..ade604dd6046 --- /dev/null +++ b/tools/osbuilder/igvm-builder/azure-linux/config.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2024 Microsoft Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# this is where the kernel-uvm package installation places bzImage, see SPEC file +BZIMAGE_BIN="/usr/share/cloud-hypervisor/bzImage" + +IGVM_EXTRACT_FOLDER="${SCRIPT_DIR}/igvm-tooling" +CLH_ACPI_TABLES_DIR="${IGVM_EXTRACT_FOLDER}/src/igvm/acpi/acpi-clh/" +IGVM_PY_FILE="${IGVM_EXTRACT_FOLDER}/src/igvm/igvmgen.py" + +IGVM_BUILD_VARS="-kernel ${BZIMAGE_BIN} -boot_mode x64 -vtl 0 -svme 1 -encrypted_page 1 -pvalidate_opt 1 -acpi ${CLH_ACPI_TABLES_DIR}" + +IGVM_KERNEL_PARAMS_COMMON="dm-mod.create=\"dm-verity,,,ro,0 ${IMAGE_DATA_SECTORS} verity 1 /dev/vda1 /dev/vda2 ${IMAGE_DATA_BLOCK_SIZE} ${IMAGE_HASH_BLOCK_SIZE} ${IMAGE_DATA_BLOCKS} 0 sha256 ${IMAGE_ROOT_HASH} ${IMAGE_SALT}\" \ + root=/dev/dm-0 rootflags=data=ordered,errors=remount-ro ro rootfstype=ext4 panic=1 no_timer_check noreplace-smp systemd.unit=kata-containers.target systemd.mask=systemd-networkd.service \ + systemd.mask=systemd-networkd.socket agent.enable_signature_verification=false" +IGVM_KERNEL_PROD_PARAMS="${IGVM_KERNEL_PARAMS_COMMON} quiet" +IGVM_KERNEL_DEBUG_PARAMS="${IGVM_KERNEL_PARAMS_COMMON} console=hvc0 systemd.log_target=console agent.log=debug agent.debug_console agent.debug_console_vport=1026" + +IGVM_FILE_NAME="kata-containers-igvm.img" +IGVM_DBG_FILE_NAME="kata-containers-igvm-debug.img" +IGVM_MEASUREMENT_FILE_NAME="igvm-measurement.cose" +IGVM_DBG_MEASUREMENT_FILE_NAME="igvm-debug-measurement.cose" diff --git a/tools/osbuilder/igvm-builder/azure-linux/igvm_lib.sh b/tools/osbuilder/igvm-builder/azure-linux/igvm_lib.sh new file mode 100644 index 000000000000..e5b13307445f --- /dev/null +++ b/tools/osbuilder/igvm-builder/azure-linux/igvm_lib.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2024 Microsoft Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +install_igvm_tool() +{ + echo "Installing IGVM tool" + if [ -d ${IGVM_EXTRACT_FOLDER} ]; then + echo "${IGVM_EXTRACT_FOLDER} folder already exists, assuming tool is already installed" + return + fi + + # the igvm tool on Azure Linux will soon be properly installed through dnf via kata-packages-uvm-build + # as of now, even when installing with pip3, we cannot delete the source folder as the ACPI tables are not being installed anywhere, hence relying on this folder + echo "Determining and downloading latest IGVM tooling release, and extracting including ACPI tables" + IGVM_VER=$(curl -sL "https://api.github.com/repos/microsoft/igvm-tooling/releases/latest" | jq -r .tag_name | sed 's/^v//') + curl -sL "https://github.com/microsoft/igvm-tooling/archive/refs/tags/${IGVM_VER}.tar.gz" | tar --no-same-owner -xz + mv igvm-tooling-${IGVM_VER} ${IGVM_EXTRACT_FOLDER} + + echo "Installing IGVM module msigvm (${IGVM_VER}) via pip3" + pushd ${IGVM_EXTRACT_FOLDER}/src + pip3 install --no-deps ./ + popd +} + +uninstall_igvm_tool() +{ + echo "Uninstalling IGVM tool" + + rm -rf ${IGVM_EXTRACT_FOLDER} + pip3 uninstall -y msigvm +} + +build_igvm_files() +{ + echo "Reading Kata image dm_verity root hash information from root_hash file" + ROOT_HASH_FILE="${SCRIPT_DIR}/../root_hash.txt" + + if [ ! -f "${ROOT_HASH_FILE}" ]; then + echo "Could no find image root hash file '${ROOT_HASH_FILE}', aborting" + exit 1 + fi + + IMAGE_ROOT_HASH=$(sed -e 's/Root hash:\s*//g;t;d' "${ROOT_HASH_FILE}") + IMAGE_SALT=$(sed -e 's/Salt:\s*//g;t;d' "${ROOT_HASH_FILE}") + IMAGE_DATA_BLOCKS=$(sed -e 's/Data blocks:\s*//g;t;d' "${ROOT_HASH_FILE}") + IMAGE_DATA_BLOCK_SIZE=$(sed -e 's/Data block size:\s*//g;t;d' "${ROOT_HASH_FILE}") + IMAGE_DATA_SECTORS_PER_BLOCK=$((IMAGE_DATA_BLOCK_SIZE / 512)) + IMAGE_DATA_SECTORS=$((IMAGE_DATA_BLOCKS * IMAGE_DATA_SECTORS_PER_BLOCK)) + IMAGE_HASH_BLOCK_SIZE=$(sed -e 's/Hash block size:\s*//g;t;d' "${ROOT_HASH_FILE}") + + # reloading the config file as various variables depend on above values + load_config_distro + + echo "Building (debug) IGVM files and creating their reference measurement files" + # we could call into the installed binary '~/.local/bin/igvmgen' when adding to PATH or, better, into 'python3 -m msigvm' + # however, as we still need the installation directory for the ACPI tables, we leave things as is for now + # at the same time we seem to need to call pip3 install for invoking the tool at all + python3 ${IGVM_PY_FILE} $IGVM_BUILD_VARS -o $IGVM_FILE_NAME -measurement_file $IGVM_MEASUREMENT_FILE_NAME -append "$IGVM_KERNEL_PROD_PARAMS" -svn $SVN + python3 ${IGVM_PY_FILE} $IGVM_BUILD_VARS -o $IGVM_DBG_FILE_NAME -measurement_file $IGVM_DBG_MEASUREMENT_FILE_NAME -append "$IGVM_KERNEL_DEBUG_PARAMS" -svn $SVN + + if [ "${PWD}" -ef "$(readlink -f $OUT_DIR)" ]; then + echo "OUT_DIR matches with current dir, not moving build artifacts" + else + echo "Moving build artifacts to ${OUT_DIR}" + mv $IGVM_FILE_NAME $IGVM_DBG_FILE_NAME $IGVM_MEASUREMENT_FILE_NAME $IGVM_DBG_MEASUREMENT_FILE_NAME $OUT_DIR + fi +} diff --git a/tools/osbuilder/igvm-builder/igvm_builder.sh b/tools/osbuilder/igvm-builder/igvm_builder.sh new file mode 100755 index 000000000000..8e539f69d941 --- /dev/null +++ b/tools/osbuilder/igvm-builder/igvm_builder.sh @@ -0,0 +1,82 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2024 Microsoft Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +set -o errexit +set -o pipefail +set -o errtrace + +[ -n "$DEBUG" ] && set -x + +SCRIPT_DIR="$(dirname $(readlink -f $0))" + +# distro-specific config file +typeset -r CONFIG_SH="config.sh" + +# Name of an optional distro-specific file which, if it exists, must implement the +# install_igvm_tool, build_igvm_files, and uninstall_igvm_tool functions. +typeset -r LIB_SH="igvm_lib.sh" + +load_config_distro() +{ + distro_config_dir="${SCRIPT_DIR}/${DISTRO}" + + [ -d "${distro_config_dir}" ] || die "Could not find configuration directory '${distro_config_dir}'" + + if [ -e "${distro_config_dir}/${LIB_SH}" ]; then + igvm_lib="${distro_config_dir}/${LIB_SH}" + echo "igvm_lib.sh file found. Loading content" + source "${igvm_lib}" + fi + + # Source config.sh from distro, depends on root_hash based variables here + igvm_config="${distro_config_dir}/${CONFIG_SH}" + source "${igvm_config}" +} + +DISTRO="azure-linux" +MODE="build" + +while getopts ":o:s:iu" OPTIONS; do + case "${OPTIONS}" in + o ) OUT_DIR=$OPTARG ;; + s ) SVN=$OPTARG ;; + i ) MODE="install" ;; + u ) MODE="uninstall" ;; + \? ) + echo "Error - Invalid Option: -$OPTARG" 1>&2 + exit 1 + ;; + : ) + echo "Error - Invalid Option: -$OPTARG requires an argument" 1>&2 + exit 1 + ;; + esac +done + +echo "IGVM builder script" +echo "-- OUT_DIR -> $OUT_DIR" +echo "-- SVN -> $SVN" +echo "-- DISTRO -> $DISTRO" +echo "-- MODE -> $MODE" + +if [ -n "$DISTRO" ]; then + load_config_distro +else + echo "DISTRO must be specified" + exit 1 +fi + +case "$MODE" in + "install") + install_igvm_tool + ;; + "uninstall") + uninstall_igvm_tool + ;; + "build") + build_igvm_files + ;; +esac diff --git a/tools/osbuilder/node-builder/azure-linux/Makefile b/tools/osbuilder/node-builder/azure-linux/Makefile new file mode 100644 index 000000000000..85ebf59e2114 --- /dev/null +++ b/tools/osbuilder/node-builder/azure-linux/Makefile @@ -0,0 +1,77 @@ +# Copyright (c) 2024 Microsoft Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +BUILD_TYPE := release + +export SHIM_REDEPLOY_CONFIG := yes + +ifeq ($(BUILD_TYPE),debug) + export AGENT_BUILD_TYPE := debug + export AGENT_POLICY_FILE := allow-all.rego + export SHIM_USE_DEBUG_CONFIG := yes +else + export AGENT_BUILD_TYPE := release + export AGENT_POLICY_FILE := allow-set-policy.rego + export SHIM_USE_DEBUG_CONFIG := no +endif + +.PHONY: all +all: package uvm + +.PHONY: all-confpods +all-confpods: package-confpods uvm-confpods + +.PHONY: package +package: + ./package_build.sh + +.PHONY: package-confpods +package-confpods: + CONF_PODS=yes ./package_build.sh + +.PHONY: uvm +uvm: + ./uvm_build.sh + +.PHONY: uvm-confpods +uvm-confpods: + CONF_PODS=yes ./uvm_build.sh + +.PHONY: clean +clean: + ./clean.sh + +.PHONY: clean-confpods +clean-confpods: + CONF_PODS=yes ./clean.sh + +.PHONY: deploy +deploy: deploy-package deploy-uvm + +.PHONY: deploy-package +deploy-package: + ./package_install.sh + +.PHONY: deploy-package-tools +deploy-package-tools: + ./package_tools_install.sh + +.PHONY: deploy-uvm +deploy-uvm: + ./uvm_install.sh + +.PHONY: deploy-confpods +deploy-confpods: deploy-confpods-package deploy-confpods-uvm + +.PHONY: deploy-confpods-package +deploy-confpods-package: + CONF_PODS=yes ./package_install.sh + +.PHONY: deploy-confpods-package-tools +deploy-confpods-package-tools: + CONF_PODS=yes ./package_tools_install.sh + +.PHONY: deploy-confpods-uvm +deploy-confpods-uvm: + CONF_PODS=yes ./uvm_install.sh diff --git a/tools/osbuilder/node-builder/azure-linux/README.md b/tools/osbuilder/node-builder/azure-linux/README.md new file mode 100644 index 000000000000..9d143d207791 --- /dev/null +++ b/tools/osbuilder/node-builder/azure-linux/README.md @@ -0,0 +1,294 @@ +# Overview + +This guide serves as a reference on how to build and install the underlying software stack for *Pod Sandboxing with AKS* and for *Confidential Containers on AKS* using Azure Linux. +This enables running Kata (Confidential) Containers via the OCI interface, or via a local kubelet, or leveraging AKS' Kubernetes solution. + +In the following, the terms *Kata* and *Kata-CC* refer to *Pod Sandboxing with AKS* and *Confidential Containers on AKS*, respectively. +The term *building* refers to build the components from source, whereas the term *installing* refers to utilizing components released by the Azure Linux team for straightforward evaluation. + +The guide provides the steps for two different environments: +- Azure Linux 3 based systems, such as Azure VMs + - Variant I: Utilize released components + - Variant II: Build components from source +- AKS nodes (based on Azure Linux 2 as of today) + +# Steps for Azure Linux 3 based environments + +## Set up AzL3 environment + +While build can happen in any Azure Linux 3 based environment, the stack can only be evaluated on environments with proper virtualization support and, for Kata-CC, on top of AMD SEV-SNP. An example of such environment are Azure Linux 3 based Azure VMs using a proper SKU: +- Deploy an Azure Linux 3 VM via `az vm create` using a [CC vm size SKU](https://learn.microsoft.com/en-us/azure/virtual-machines/dcasccv5-dcadsccv5-series) + - Example: `az vm create --resource-group --name --os-disk-size-gb --public-ip-sku Standard --size --admin-username azureuser --ssh-key-values --image ` +- SSH onto the VM + +Not validated for evaluation: Install [Azure Linux 3](https://github.com/microsoft/azurelinux) on a bare metal machine supporting AMD SEV-SNP. + +To merely build the stack, we refer to the official [Azure Linux GitHub page](https://github.com/microsoft/azurelinux) to set up an Azure Linux 3 environment. + +## Deploy required host packages (incl. VMM, SEV-SNP capable kernel and Microsoft Hypervisor) and extend containerd configuration + +Install relevant packages, append a configuration snippet to `/etc/containerd/config.toml` to register the Kata(-CC) handlers, then reboot the system: +``` +sudo dnf -y makecache +sudo dnf -y install kata-packages-host + +sudo tee -a /etc/containerd/config.toml 2&>1 <` + - For build and deployment of both Kata and Kata-CC artifacts, first run the `make all` and `make deploy` commands to build and install the Kata Containers for AKS components followed by `make clean`, and then run `make all-confpods` and `make deploy-confpods` to build and install the Confidential Containers for AKS components - or vice versa (using `make clean-confpods`). + +## Debug builds + +This section describes how to build and deploy in debug mode. + +`make all-confpods` takes the following variables: + + * `AGENT_BUILD_TYPE`: Specify `release` (default) to build the agent in + release mode, or `debug` to build it in debug mode. + * `AGENT_POLICY_FILE`: Specify `allow-set-policy.rego` (default) to use + a restrictive policy, or `allow-all.rego` to use a permissive policy. + +`make deploy-confpods` takes the following variable: + + * `SHIM_USE_DEBUG_CONFIG`: Specify `no` (default) to use the production + configuration, or `yes` to use the debug configuration (all debug + logging enabled). In this case you'll want to enable debug logging + in containerd as well. Note that this variable has no effect if + `SHIM_REDEPLOY_CONFIG=no`. + +In general, you can specify the debug configuration for all the above +variables by using `BUILD_TYPE=debug` as such: + +```shell +sudo make BUILD_TYPE=debug all-confpods deploy-confpods +``` + +Also note that make still lets you override the other variables even +after setting `BUILD_TYPE`. For example, you can use the production shim +config with `BUILD_TYPE=debug`: + +```shell +sudo make BUILD_TYPE=debug SHIM_USE_DEBUG_CONFIG=no all-confpods deploy-confpods +``` + +### Prevent redeploying the shim configuration + +If you're manually modifying the shim configuration directly on the host +during development and you don't want to redeploy and overwrite that +file each time you redeploy binaries, you can separately specify the +`SHIM_REDEPLOY_CONFIG` (default `yes`): + +```shell +sudo make SHIM_REDEPLOY_CONFIG=no all-confpods deploy-confpods +``` + +Note that this variable is independent from the other variables +mentioned above. So if you want to avoid redeploying the shim +configuration AND build in debug mode, you have to use the following +command: + +```shell +sudo make BUILD_TYPE=debug SHIM_REDEPLOY_CONFIG=no all-confpods deploy-confpods +``` + +# Run Kata (Confidential) Containers + +## Run via CRI or via containerd API + +Use e.g. `crictl` (or `ctr`) to schedule Kata (Confidential) containers, referencing either the Kata or Kata-CC handlers. + +Note: On Kubernetes nodes, pods created via `crictl` will be deleted by the control plane. + +The following instructions serve as a general reference: +- Install `crictl`, `cni` binaries, and set runtime endpoint in `crictl` configuration: + + ``` + sudo dnf -y install cri-tools cni + sudo crictl config --set runtime-endpoint=unix:///run/containerd/containerd.sock + ``` + +- Set a proper CNI configuration and create a sample pod manifest: This step is omitted as it depends on the individual needs. + +- Run pods with `crictl`, for example: + + `sudo crictl runp -T 30s -r ` + +- Run containers with `ctr`, for example a confidential container: + + `sudo ctr -n=k8s.io image pull --snapshotter=tardev docker.io/library/busybox:latest` + + `sudo ctr -n=k8s.io run --cni --runtime io.containerd.run.kata-cc.v2 --runtime-config-path /opt/confidential-containers/share/defaults/kata-containers/configuration-clh-snp.toml --snapshotter tardev -t --rm docker.io/library/busybox:latest hello sh` + +For further usage we refer to the upstream `crictl` (or `ctr`) and CNI documentation. + +## Run via Kubernetes + +If your environment was set up through `az aks create` the respective node is ready to run Kata (Confidential) Containers as AKS Kubernetes pods. +Other types of Kubernetes clusters should work as well. While this document doesn't cover how to set-up those clusters, you can +apply the kata and kata-cc runtime classes to your cluster from the machine that holds your kubeconfig file, for example: +``` +cat << EOF > runtimeClass-kata-cc.yaml +kind: RuntimeClass +apiVersion: node.k8s.io/v1 +metadata: + name: kata-cc +handler: kata-cc +overhead: + podFixed: + memory: "600Mi" +scheduling: + nodeSelector: + katacontainers.io/kata-runtime: "true" +EOF + +cat << EOF > runtimeClass-kata.yaml +kind: RuntimeClass +apiVersion: node.k8s.io/v1 +metadata: + name: kata +handler: kata +overhead: + podFixed: + memory: "600Mi" +scheduling: + nodeSelector: + katacontainers.io/kata-runtime: "true" +EOF + +kubectl apply -f runtimeClass-kata-cc.yaml -f runtimeClass-kata.yaml +``` + +And label your node appropriately: +``` +kubectl label node katacontainers.io/kata-runtime=true +``` + +# Build attestation scenarios +The build artifacts for the UVM ConfPods target include an IGVM file and a so-called reference measurement file (unsigned). The IGVM file is being loaded into memory measured by the AMD SEV-SNP PSP (when a Confidental Container is started). With this and with the Kata security policy feature, attestation scenarios can be built: the reference measurement (often referred to as 'endorsement') can, for example, be signed by a trusted party (such as Microsoft in Confidential Containers on AKS) and be compared with the actual measurement part of the attestation report. The latter can be retrieved through respective system calls inside the Kata Confidential Containers Guest VM. + +An example for an attestation scenario through Microsoft Azure Attestation is presented in [Attestation in Confidential containers on Azure Container Instances](https://learn.microsoft.com/en-us/azure/container-instances/confidential-containers-attestation-concepts). +Documentation for leveraging the Kata security policy feature can be found in [Security policy for Confidential Containers on Azure Kubernetes Service](https://learn.microsoft.com/en-us/azure/confidential-computing/confidential-containers-aks-security-policy). diff --git a/tools/osbuilder/node-builder/azure-linux/clean.sh b/tools/osbuilder/node-builder/azure-linux/clean.sh new file mode 100755 index 000000000000..11cf6fb03bd5 --- /dev/null +++ b/tools/osbuilder/node-builder/azure-linux/clean.sh @@ -0,0 +1,72 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2024 Microsoft Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +set -o errexit +set -o pipefail +set -o errtrace + +[ -n "$DEBUG" ] && set -x + +script_dir="$(dirname $(readlink -f $0))" +repo_dir="${script_dir}/../../../../" + +common_file="common.sh" +source "${common_file}" + +pushd "${repo_dir}" + +echo "Clean debug shim config" +pushd src/runtime/config/ +rm -f "${SHIM_DBG_CONFIG_FILE_NAME}" +popd + +echo "Clean runtime build" +pushd src/runtime/ +make clean SKIP_GO_VERSION_CHECK=1 +popd + +echo "Clean agent build" +pushd src/agent/ +make clean +popd + +rm -rf ${AGENT_INSTALL_DIR} + +echo "Clean UVM build" +pushd tools/osbuilder/ +sudo -E PATH=$PATH make DISTRO=cbl-mariner clean +popd + +echo "Clean IGVM tool installation" + + +if [ "${CONF_PODS}" == "yes" ]; then + + echo "Clean tardev-snapshotter tarfs driver build" + pushd src/tarfs + set_uvm_kernel_vars + if [ -n "${UVM_KERNEL_HEADER_DIR}" ]; then + make clean KDIR=${UVM_KERNEL_HEADER_DIR} + fi + popd + + echo "Clean utarfs binary build" + pushd src/utarfs/ + make clean + popd + + echo "Clean tardev-snapshotter overlay binary build" + pushd src/overlay/ + make clean + popd + + echo "Clean tardev-snapshotter service build" + pushd src/tardev-snapshotter/ + make clean + popd +fi + +popd diff --git a/tools/osbuilder/node-builder/azure-linux/common.sh b/tools/osbuilder/node-builder/azure-linux/common.sh new file mode 100755 index 000000000000..8b0665c47aa2 --- /dev/null +++ b/tools/osbuilder/node-builder/azure-linux/common.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2024 Microsoft Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +script_dir="$(dirname $(readlink -f $0))" +lib_file="${script_dir}/../../scripts/lib.sh" +source "${lib_file}" + +OS_VERSION=$(sort -r /etc/*-release | gawk 'match($0, /^(VERSION_ID=(.*))$/, a) { print toupper(a[2] a[3]); exit }' | tr -d '"') + +([[ "${OS_VERSION}" == "2.0" ]] || [[ "${OS_VERSION}" == "3.0" ]]) || die "OS_VERSION: value '${OS_VERSION}' must equal 3.0 (default) or 2.0" + +if [ "${CONF_PODS}" == "yes" ]; then + INSTALL_PATH_PREFIX="/opt/confidential-containers" + UVM_TOOLS_PATH_OSB="${INSTALL_PATH_PREFIX}/uvm/tools/osbuilder" + UVM_TOOLS_PATH_SRC="${INSTALL_PATH_PREFIX}/uvm/src" + UVM_PATH_DEFAULT="${INSTALL_PATH_PREFIX}/share/kata-containers" + IMG_FILE_NAME="kata-containers.img" + IGVM_FILE_NAME="kata-containers-igvm.img" + IGVM_DBG_FILE_NAME="kata-containers-igvm-debug.img" + UVM_MEASUREMENT_FILE_NAME="igvm-measurement.cose" + UVM_DBG_MEASUREMENT_FILE_NAME="igvm-debug-measurement.cose" + SHIM_CONFIG_PATH="${INSTALL_PATH_PREFIX}/share/defaults/kata-containers" + SHIM_CONFIG_FILE_NAME="configuration-clh-snp.toml" + SHIM_CONFIG_INST_FILE_NAME="${SHIM_CONFIG_FILE_NAME}" + SHIM_DBG_CONFIG_FILE_NAME="configuration-clh-snp-debug.toml" + SHIM_DBG_CONFIG_INST_FILE_NAME="${SHIM_DBG_CONFIG_FILE_NAME}" + DEBUGGING_BINARIES_PATH="${INSTALL_PATH_PREFIX}/bin" + SHIM_BINARIES_PATH="/usr/local/bin" + SHIM_BINARY_NAME="containerd-shim-kata-cc-v2" +else + INSTALL_PATH_PREFIX="/usr" + UVM_TOOLS_PATH_OSB="/opt/kata-containers/uvm/tools/osbuilder" + UVM_TOOLS_PATH_SRC="/opt/kata-containers/uvm/src" + UVM_PATH_DEFAULT="${INSTALL_PATH_PREFIX}/share/kata-containers" + IMG_FILE_NAME="kata-containers.img" + SHIM_CONFIG_PATH="${INSTALL_PATH_PREFIX}/share/defaults/kata-containers" + SHIM_CONFIG_FILE_NAME="configuration-clh.toml" + SHIM_CONFIG_INST_FILE_NAME="configuration.toml" + SHIM_DBG_CONFIG_FILE_NAME="configuration-clh-debug.toml" + SHIM_DBG_CONFIG_INST_FILE_NAME="${SHIM_DBG_CONFIG_FILE_NAME}" + DEBUGGING_BINARIES_PATH="${INSTALL_PATH_PREFIX}/local/bin" + SHIM_BINARIES_PATH="${INSTALL_PATH_PREFIX}/local/bin" + SHIM_BINARY_NAME="containerd-shim-kata-v2" +fi + +# this is where cloud-hypervisor-cvm gets installed (see package SPEC) +CLOUD_HYPERVISOR_LOCATION="/usr/bin/cloud-hypervisor" +# this is where kernel-uvm gets installed (see package SPEC) +KERNEL_BINARY_LOCATION="/usr/share/cloud-hypervisor/vmlinux.bin" +# Mariner 3: different binary name +if [ "${OS_VERSION}" == "2.0" ]; then + VIRTIOFSD_BINARY_LOCATION="/usr/libexec/virtiofsd-rs" +else + VIRTIOFSD_BINARY_LOCATION="/usr/libexec/virtiofsd" +fi + +AGENT_INSTALL_DIR="${script_dir}/agent-install" + +set_uvm_kernel_vars() { + UVM_KERNEL_VERSION=$(rpm -q --queryformat '%{VERSION}' kernel-uvm-devel) + UVM_KERNEL_RELEASE=$(rpm -q --queryformat '%{RELEASE}' kernel-uvm-devel) + UVM_KERNEL_HEADER_DIR="/usr/src/linux-headers-${UVM_KERNEL_VERSION}-${UVM_KERNEL_RELEASE}" +} diff --git a/tools/osbuilder/node-builder/azure-linux/package_build.sh b/tools/osbuilder/node-builder/azure-linux/package_build.sh new file mode 100755 index 000000000000..16fe9657d6bc --- /dev/null +++ b/tools/osbuilder/node-builder/azure-linux/package_build.sh @@ -0,0 +1,97 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2024 Microsoft Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +set -o errexit +set -o pipefail +set -o errtrace + +[ -n "$DEBUG" ] && set -x + +AGENT_BUILD_TYPE=${AGENT_BUILD_TYPE:-release} +CONF_PODS=${CONF_PODS:-no} + +script_dir="$(dirname $(readlink -f $0))" +repo_dir="${script_dir}/../../../../" + +common_file="common.sh" +source "${common_file}" + +# these options ensure we produce the proper CLH config file +runtime_make_flags="SKIP_GO_VERSION_CHECK=1 QEMUCMD= FCCMD= ACRNCMD= STRATOVIRTCMD= DEFAULT_HYPERVISOR=cloud-hypervisor + DEFMEMSZ=256 DEFSTATICSANDBOXWORKLOADMEM=1792 DEFVIRTIOFSDAEMON=${VIRTIOFSD_BINARY_LOCATION} PREFIX=${INSTALL_PATH_PREFIX}" + +# - for vanilla Kata we use the kernel binary. For ConfPods we use IGVM, so no need to provide kernel path. +# - for vanilla Kata we explicitly set DEFSTATICRESOURCEMGMT_CLH. For ConfPods, +# the variable DEFSTATICRESOURCEMGMT_TEE is used which defaults to false +# - for ConfPods we explicitly set the cloud-hypervisor path. The path is independent of the PREFIX variable +# as we have a single CLH binary for both vanilla Kata and ConfPods +if [ "${CONF_PODS}" == "no" ]; then + runtime_make_flags+=" DEFSTATICRESOURCEMGMT_CLH=true KERNELPATH_CLH=${KERNEL_BINARY_LOCATION}" +else + runtime_make_flags+=" CLHPATH=${CLOUD_HYPERVISOR_LOCATION}" +fi + +# On Mariner 3.0 we use cgroupsv2 with a single sandbox cgroup +if [ "${OS_VERSION}" == "3.0" ]; then + runtime_make_flags+=" DEFSANDBOXCGROUPONLY=true" +fi + +agent_make_flags="LIBC=gnu OPENSSL_NO_VENDOR=Y DESTDIR=${AGENT_INSTALL_DIR} BUILD_TYPE=${AGENT_BUILD_TYPE}" + +if [ "${CONF_PODS}" == "yes" ]; then + agent_make_flags+=" AGENT_POLICY=yes" +fi + +pushd "${repo_dir}" + +if [ "${CONF_PODS}" == "yes" ]; then + + echo "Building utarfs binary" + pushd src/utarfs/ + make all + popd + + echo "Building kata-overlay binary" + pushd src/overlay/ + make all + popd + + echo "Building tardev-snapshotter service binary" + pushd src/tardev-snapshotter/ + make all + popd +fi + +echo "Building shim binary and configuration" +pushd src/runtime/ +if [ "${CONF_PODS}" == "yes" ] || [ "${OS_VERSION}" == "3.0" ]; then + make ${runtime_make_flags} +else + # Mariner 2 pod sandboxing uses cgroupsv1 - note: cannot add the kernelparams in above assignments, + # leads to quotation issue. Hence, implementing the conditional check right here at the time of the make command + make ${runtime_make_flags} KERNELPARAMS="systemd.legacy_systemd_cgroup_controller=yes systemd.unified_cgroup_hierarchy=0" +fi +popd + +pushd src/runtime/config/ +echo "Creating shim debug configuration" +cp "${SHIM_CONFIG_FILE_NAME}" "${SHIM_DBG_CONFIG_FILE_NAME}" +sed -i '/^#enable_debug =/s|^#||g' "${SHIM_DBG_CONFIG_FILE_NAME}" +sed -i '/^#debug_console_enabled =/s|^#||g' "${SHIM_DBG_CONFIG_FILE_NAME}" + +if [ "${CONF_PODS}" == "yes" ]; then + echo "Adding debug igvm to SNP shim debug configuration" + sed -i "s|${IGVM_FILE_NAME}|${IGVM_DBG_FILE_NAME}|g" "${SHIM_DBG_CONFIG_FILE_NAME}" +fi +popd + +echo "Building agent binary and generating service files" +pushd src/agent/ +make ${agent_make_flags} +make install ${agent_make_flags} +popd + +popd diff --git a/tools/osbuilder/node-builder/azure-linux/package_install.sh b/tools/osbuilder/node-builder/azure-linux/package_install.sh new file mode 100755 index 000000000000..791cff5d92d2 --- /dev/null +++ b/tools/osbuilder/node-builder/azure-linux/package_install.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2024 Microsoft Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +set -o errexit +set -o pipefail +set -o errtrace + +[ -n "$DEBUG" ] && set -x + +CONF_PODS=${CONF_PODS:-no} +PREFIX=${PREFIX:-} +SHIM_REDEPLOY_CONFIG=${SHIM_REDEPLOY_CONFIG:-yes} +SHIM_USE_DEBUG_CONFIG=${SHIM_USE_DEBUG_CONFIG:-no} +START_SERVICES=${START_SERVICES:-yes} + +script_dir="$(dirname $(readlink -f $0))" +repo_dir="${script_dir}/../../../../" + +common_file="common.sh" +source "${common_file}" + +pushd "${repo_dir}" + +echo "Creating target directories" +mkdir -p "${PREFIX}/${SHIM_CONFIG_PATH}" +mkdir -p "${PREFIX}/${DEBUGGING_BINARIES_PATH}" +mkdir -p "${PREFIX}/${SHIM_BINARIES_PATH}" + +if [ "${CONF_PODS}" == "yes" ]; then + echo "Installing tardev-snapshotter binaries and service file" + mkdir -p ${PREFIX}/usr/sbin + cp -a --backup=numbered src/utarfs/target/release/utarfs ${PREFIX}/usr/sbin/mount.tar + mkdir -p ${PREFIX}/usr/bin + cp -a --backup=numbered src/overlay/target/release/kata-overlay ${PREFIX}/usr/bin/ + cp -a --backup=numbered src/tardev-snapshotter/target/release/tardev-snapshotter ${PREFIX}/usr/bin/ + mkdir -p ${PREFIX}/usr/lib/systemd/system/ + cp -a --backup=numbered src/tardev-snapshotter/tardev-snapshotter.service ${PREFIX}/usr/lib/systemd/system/ + + echo "Enabling and starting snapshotter service" + if [ "${START_SERVICES}" == "yes" ]; then + systemctl enable tardev-snapshotter && systemctl daemon-reload && systemctl restart tardev-snapshotter + fi +fi + +echo "Installing diagnosability binaries (monitor, runtime, collect-data script)" +cp -a --backup=numbered src/runtime/kata-monitor "${PREFIX}/${DEBUGGING_BINARIES_PATH}" +cp -a --backup=numbered src/runtime/kata-runtime "${PREFIX}/${DEBUGGING_BINARIES_PATH}" +chmod +x src/runtime/data/kata-collect-data.sh +cp -a --backup=numbered src/runtime/data/kata-collect-data.sh "${PREFIX}/${DEBUGGING_BINARIES_PATH}" + +echo "Installing shim binary" +cp -a --backup=numbered src/runtime/containerd-shim-kata-v2 "${PREFIX}/${SHIM_BINARIES_PATH}"/"${SHIM_BINARY_NAME}" + +if [ "${SHIM_REDEPLOY_CONFIG}" == "yes" ]; then + echo "Installing shim configuration" + cp -a --backup=numbered src/runtime/config/"${SHIM_CONFIG_FILE_NAME}" "${PREFIX}/${SHIM_CONFIG_PATH}/${SHIM_CONFIG_INST_FILE_NAME}" + cp -a --backup=numbered src/runtime/config/"${SHIM_DBG_CONFIG_FILE_NAME}" "${PREFIX}/${SHIM_CONFIG_PATH}/${SHIM_DBG_CONFIG_INST_FILE_NAME}" + + if [ "${SHIM_USE_DEBUG_CONFIG}" == "yes" ]; then + # We simply override the release config with the debug config, + # which is probably fine when debugging. Not symlinking as that + # would create cycles the next time this script is called. + echo "Overriding shim configuration with debug configuration" + cp -a --backup=numbered src/runtime/config/"${SHIM_DBG_CONFIG_FILE_NAME}" "${PREFIX}/${SHIM_CONFIG_PATH}/${SHIM_CONFIG_INST_FILE_NAME}" + fi +else + echo "Skipping installation of shim configuration" +fi + +popd diff --git a/tools/osbuilder/node-builder/azure-linux/package_tools_install.sh b/tools/osbuilder/node-builder/azure-linux/package_tools_install.sh new file mode 100755 index 000000000000..a1f32ca88576 --- /dev/null +++ b/tools/osbuilder/node-builder/azure-linux/package_tools_install.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2024 Microsoft Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +set -o errexit +set -o pipefail +set -o errtrace + +[ -n "$DEBUG" ] && set -x + +CONF_PODS=${CONF_PODS:-no} +PREFIX=${PREFIX:-} + +script_dir="$(dirname $(readlink -f $0))" +repo_dir="${script_dir}/../../../../" + +common_file="common.sh" +source "${common_file}" + +pushd "${repo_dir}" + +echo "Creating target directories" +mkdir -p "${PREFIX}/${UVM_TOOLS_PATH_OSB}/scripts" +mkdir -p "${PREFIX}/${UVM_TOOLS_PATH_OSB}/rootfs-builder/cbl-mariner" +mkdir -p "${PREFIX}/${UVM_TOOLS_PATH_OSB}/image-builder" +mkdir -p "${PREFIX}/${UVM_TOOLS_PATH_OSB}/node-builder/azure-linux/agent-install/usr/bin" +mkdir -p "${PREFIX}/${UVM_TOOLS_PATH_OSB}/node-builder/azure-linux/agent-install/usr/lib/systemd/system" + +if [ "${CONF_PODS}" == "yes" ]; then + mkdir -p "${PREFIX}/${UVM_TOOLS_PATH_SRC}/kata-opa" + mkdir -p "${PREFIX}/${UVM_TOOLS_PATH_SRC}/tarfs" + mkdir -p "${PREFIX}/${UVM_TOOLS_PATH_OSB}/igvm-builder/azure-linux" +fi + +echo "Installing UVM build scripting" +cp -a --backup=numbered tools/osbuilder/Makefile "${PREFIX}/${UVM_TOOLS_PATH_OSB}/" +cp -a --backup=numbered tools/osbuilder/scripts/lib.sh "${PREFIX}/${UVM_TOOLS_PATH_OSB}/scripts/" +cp -a --backup=numbered tools/osbuilder/rootfs-builder/rootfs.sh "${PREFIX}/${UVM_TOOLS_PATH_OSB}/rootfs-builder/" +cp -a --backup=numbered tools/osbuilder/rootfs-builder/cbl-mariner/config.sh "${PREFIX}/${UVM_TOOLS_PATH_OSB}/rootfs-builder/cbl-mariner/" +cp -a --backup=numbered tools/osbuilder/rootfs-builder/cbl-mariner/rootfs_lib.sh "${PREFIX}/${UVM_TOOLS_PATH_OSB}/rootfs-builder/cbl-mariner/" +cp -a --backup=numbered tools/osbuilder/image-builder/image_builder.sh "${PREFIX}/${UVM_TOOLS_PATH_OSB}/image-builder/" +cp -a --backup=numbered tools/osbuilder/image-builder/nsdax.gpl.c "${PREFIX}/${UVM_TOOLS_PATH_OSB}/image-builder/" +cp -a --backup=numbered tools/osbuilder/node-builder/azure-linux/Makefile "${PREFIX}/${UVM_TOOLS_PATH_OSB}/node-builder/azure-linux/" +cp -a --backup=numbered tools/osbuilder/node-builder/azure-linux/clean.sh "${PREFIX}/${UVM_TOOLS_PATH_OSB}/node-builder/azure-linux/" +cp -a --backup=numbered tools/osbuilder/node-builder/azure-linux/common.sh "${PREFIX}/${UVM_TOOLS_PATH_OSB}/node-builder/azure-linux/" +cp -a --backup=numbered tools/osbuilder/node-builder/azure-linux/uvm_build.sh "${PREFIX}/${UVM_TOOLS_PATH_OSB}/node-builder/azure-linux/" +cp -a --backup=numbered tools/osbuilder/node-builder/azure-linux/uvm_install.sh "${PREFIX}/${UVM_TOOLS_PATH_OSB}/node-builder/azure-linux/" + +echo "Installing agent binary and service files" +cp -a --backup=numbered tools/osbuilder/node-builder/azure-linux/agent-install/usr/bin/kata-agent "${PREFIX}/${UVM_TOOLS_PATH_OSB}/node-builder/azure-linux/agent-install/usr/bin/" +cp -a --backup=numbered tools/osbuilder/node-builder/azure-linux/agent-install/usr/lib/systemd/system/kata-containers.target "${PREFIX}/${UVM_TOOLS_PATH_OSB}/node-builder/azure-linux/agent-install/usr/lib/systemd/system/" +cp -a --backup=numbered tools/osbuilder/node-builder/azure-linux/agent-install/usr/lib/systemd/system/kata-agent.service "${PREFIX}/${UVM_TOOLS_PATH_OSB}/node-builder/azure-linux/agent-install/usr/lib/systemd/system/" + +if [ "${CONF_PODS}" == "yes" ]; then + cp -a --backup=numbered src/kata-opa/allow-all.rego "${PREFIX}/${UVM_TOOLS_PATH_SRC}/kata-opa/" + cp -a --backup=numbered src/kata-opa/allow-set-policy.rego "${PREFIX}/${UVM_TOOLS_PATH_SRC}/kata-opa/" + cp -a --backup=numbered src/tarfs/Makefile "${PREFIX}/${UVM_TOOLS_PATH_SRC}/tarfs/" + cp -a --backup=numbered src/tarfs/tarfs.c "${PREFIX}/${UVM_TOOLS_PATH_SRC}/tarfs/" + cp -a --backup=numbered tools/osbuilder/igvm-builder/igvm_builder.sh "${PREFIX}/${UVM_TOOLS_PATH_OSB}/igvm-builder/" + cp -a --backup=numbered tools/osbuilder/igvm-builder/azure-linux/config.sh "${PREFIX}/${UVM_TOOLS_PATH_OSB}/igvm-builder/azure-linux/" + cp -a --backup=numbered tools/osbuilder/igvm-builder/azure-linux/igvm_lib.sh "${PREFIX}/${UVM_TOOLS_PATH_OSB}/igvm-builder/azure-linux/" +fi + +popd diff --git a/tools/osbuilder/node-builder/azure-linux/uvm_build.sh b/tools/osbuilder/node-builder/azure-linux/uvm_build.sh new file mode 100755 index 000000000000..c24b71e2e7d5 --- /dev/null +++ b/tools/osbuilder/node-builder/azure-linux/uvm_build.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2024 Microsoft Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +set -o errexit +set -o pipefail +set -o errtrace + +[ -n "$DEBUG" ] && set -x + +AGENT_POLICY_FILE="${AGENT_POLICY_FILE:-allow-set-policy.rego}" +CONF_PODS=${CONF_PODS:-no} +IGVM_SVN=${IGVM_SVN:-0} + +script_dir="$(dirname $(readlink -f $0))" +repo_dir="${script_dir}/../../../../" + +agent_policy_file_abs="${repo_dir}/src/kata-opa/${AGENT_POLICY_FILE}" + +common_file="common.sh" +source "${common_file}" + +# This ensures that a pre-built agent binary is being injected into the rootfs +rootfs_make_flags="AGENT_SOURCE_BIN=${AGENT_INSTALL_DIR}/usr/bin/kata-agent OS_VERSION=${OS_VERSION}" + +if [ "${CONF_PODS}" == "yes" ]; then + rootfs_make_flags+=" AGENT_POLICY=yes CONF_GUEST=yes AGENT_POLICY_FILE=${agent_policy_file_abs}" +fi + +if [ "${CONF_PODS}" == "yes" ]; then + set_uvm_kernel_vars + if [ -z "${UVM_KERNEL_HEADER_DIR}" ]; then + exit 1 + fi +fi + +pushd "${repo_dir}" + +echo "Building rootfs and including pre-built agent binary" +pushd tools/osbuilder +# This command requires sudo because of dnf-installing packages into rootfs. As a suite, following commands require sudo as well as make clean +sudo -E PATH=$PATH make ${rootfs_make_flags} -B DISTRO=cbl-mariner rootfs +ROOTFS_PATH="$(readlink -f ./cbl-mariner_rootfs)" +popd + +echo "Installing agent service files into rootfs" +sudo cp ${AGENT_INSTALL_DIR}/usr/lib/systemd/system/kata-containers.target ${ROOTFS_PATH}/usr/lib/systemd/system/kata-containers.target +sudo cp ${AGENT_INSTALL_DIR}/usr/lib/systemd/system/kata-agent.service ${ROOTFS_PATH}/usr/lib/systemd/system/kata-agent.service + +if [ "${CONF_PODS}" == "yes" ]; then + echo "Building tarfs kernel driver and installing into rootfs" + pushd src/tarfs + make KDIR=${UVM_KERNEL_HEADER_DIR} + sudo make KDIR=${UVM_KERNEL_HEADER_DIR} KVER=${UVM_KERNEL_VERSION} INSTALL_MOD_PATH=${ROOTFS_PATH} install + popd + + echo "Building dm-verity protected image based on rootfs" + pushd tools/osbuilder + sudo -E PATH=$PATH make DISTRO=cbl-mariner MEASURED_ROOTFS=yes DM_VERITY_FORMAT=kernelinit IMAGE_SIZE_ALIGNMENT_MB=2 image + popd + + echo "Building IGVM and UVM measurement files" + pushd tools/osbuilder + sudo chmod o+r root_hash.txt + sudo make igvm DISTRO=cbl-mariner IGVM_SVN=${IGVM_SVN} + popd +else + echo "Building image based on rootfs" + pushd tools/osbuilder + sudo -E PATH=$PATH make DISTRO=cbl-mariner IMAGE_SIZE_ALIGNMENT_MB=2 image + popd +fi + +popd diff --git a/tools/osbuilder/node-builder/azure-linux/uvm_install.sh b/tools/osbuilder/node-builder/azure-linux/uvm_install.sh new file mode 100755 index 000000000000..09e2cfa386eb --- /dev/null +++ b/tools/osbuilder/node-builder/azure-linux/uvm_install.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2024 Microsoft Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +set -o errexit +set -o pipefail +set -o errtrace + +[ -n "$DEBUG" ] && set -x + +CONF_PODS=${CONF_PODS:-no} + +script_dir="$(dirname $(readlink -f $0))" +repo_dir="${script_dir}/../../../../" + +common_file="common.sh" +source "${common_file}" + +UVM_PATH=${UVM_PATH:-${UVM_PATH_DEFAULT}} + +pushd "${repo_dir}" + +pushd tools/osbuilder + +echo "Creating target directory" +mkdir -p "${UVM_PATH}" + +echo "Installing UVM files to target directory" +if [ "${CONF_PODS}" == "yes" ]; then + cp -a --backup=numbered "${IGVM_FILE_NAME}" "${UVM_PATH}" + cp -a --backup=numbered "${IGVM_DBG_FILE_NAME}" "${UVM_PATH}" + cp -a --backup=numbered "${UVM_MEASUREMENT_FILE_NAME}" "${UVM_PATH}" + cp -a --backup=numbered "${UVM_DBG_MEASUREMENT_FILE_NAME}" "${UVM_PATH}" +fi + +cp -a --backup=numbered "${IMG_FILE_NAME}" "${UVM_PATH}" + +popd + +popd From 7ddec33642c9a307d4af79078c2f15be887bec86 Mon Sep 17 00:00:00 2001 From: Dan Mihai Date: Tue, 31 Dec 2024 21:00:28 +0000 Subject: [PATCH 03/36] runtime: improved memory overhead management MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After these changes: 1. The value of the K8s runtime class memory overhead: - Covers the memory usage from all the Host-side components (mainly the Kata Shim and the VMM). - Doesn't include the memory usage from any Guest-side components. 2. The value of a pod memory limit specified by the user: - Is equal to the memory size of the Pod VM. - Includes the memory usage from all the Guest-side components (mainly user's workload, the Guest kernel, and the Kata Agent) - Doesn't include the memory usage from any Host-side components. Signed-off-by: Dan Mihai runtime: fix `make test` This addresses the following errors from `make test` to allow us to require that upstream CI: https://github.com/microsoft/kata-containers/actions/runs/16656407213/job/47142422035?pr=392#step:13:53 Signed-off-by: Aurélien Bombo --- src/runtime/pkg/katautils/config.go | 5 ----- src/runtime/pkg/katautils/config_test.go | 4 ++-- src/runtime/virtcontainers/hypervisor.go | 2 +- tools/osbuilder/node-builder/azure-linux/package_build.sh | 2 +- 4 files changed, 4 insertions(+), 9 deletions(-) diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index 44a5c865bee6..b69873d911bc 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -1996,11 +1996,6 @@ func checkHypervisorConfig(config vc.HypervisorConfig) error { } memSizeMB := int64(config.MemorySize) - - if memSizeMB == 0 { - return errors.New("VM memory cannot be zero") - } - mb := int64(1024 * 1024) for _, image := range images { diff --git a/src/runtime/pkg/katautils/config_test.go b/src/runtime/pkg/katautils/config_test.go index 54a7127d7060..1415168976ce 100644 --- a/src/runtime/pkg/katautils/config_test.go +++ b/src/runtime/pkg/katautils/config_test.go @@ -943,7 +943,7 @@ func TestHypervisorDefaults(t *testing.T) { assert.Equal(h.machineType(), defaultMachineType, "default hypervisor machine type wrong") assert.Equal(h.defaultVCPUs(), float32(defaultVCPUCount), "default vCPU number is wrong") assert.Equal(h.defaultMaxVCPUs(), numCPUs, "default max vCPU number is wrong") - assert.Equal(h.defaultMemSz(), defaultMemSize, "default memory size is wrong") + assert.Equal(h.defaultMemSz(), uint32(0), "default memory size is wrong") machineType := "foo" h.MachineType = machineType @@ -1526,7 +1526,7 @@ func TestCheckHypervisorConfig(t *testing.T) { // function, hence no test for it here. data := []testData{ - {"", "", 0, true, false}, + {"", "", 0, false, false}, {imageENOENT, "", 2, true, false}, {"", initrdENOENT, 2, true, false}, diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index 5d0522351c48..4e1c70ea3813 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -84,7 +84,7 @@ const ( vSockLogsPort = 1025 // MinHypervisorMemory is the minimum memory required for a VM. - MinHypervisorMemory = 256 + MinHypervisorMemory = 0 defaultMsize9p = 8192 diff --git a/tools/osbuilder/node-builder/azure-linux/package_build.sh b/tools/osbuilder/node-builder/azure-linux/package_build.sh index 16fe9657d6bc..ce53530f2883 100755 --- a/tools/osbuilder/node-builder/azure-linux/package_build.sh +++ b/tools/osbuilder/node-builder/azure-linux/package_build.sh @@ -21,7 +21,7 @@ source "${common_file}" # these options ensure we produce the proper CLH config file runtime_make_flags="SKIP_GO_VERSION_CHECK=1 QEMUCMD= FCCMD= ACRNCMD= STRATOVIRTCMD= DEFAULT_HYPERVISOR=cloud-hypervisor - DEFMEMSZ=256 DEFSTATICSANDBOXWORKLOADMEM=1792 DEFVIRTIOFSDAEMON=${VIRTIOFSD_BINARY_LOCATION} PREFIX=${INSTALL_PATH_PREFIX}" + DEFMEMSZ=0 DEFSTATICSANDBOXWORKLOADMEM=512 DEFVIRTIOFSDAEMON=${VIRTIOFSD_BINARY_LOCATION} PREFIX=${INSTALL_PATH_PREFIX}" # - for vanilla Kata we use the kernel binary. For ConfPods we use IGVM, so no need to provide kernel path. # - for vanilla Kata we explicitly set DEFSTATICRESOURCEMGMT_CLH. For ConfPods, From 521def747f517693c83dd33775009e3c03db0aa4 Mon Sep 17 00:00:00 2001 From: Manuel Huber Date: Thu, 2 Jan 2025 22:49:49 +0000 Subject: [PATCH 04/36] runtime: Allocate default workload vcpus - similar to the static_sandbox_default_workload_mem option, assign a default number of vcpus to the VM when no limits are given, 1 vcpu in this case - similar to commit c7b8ee9, do not allocate additional vcpus when limits are provided Signed-off-by: Manuel Huber --- src/runtime/Makefile | 6 ++++-- src/runtime/config/configuration-clh.toml.in | 7 ++++++- src/runtime/pkg/katautils/config-settings.go.in | 2 +- src/runtime/pkg/katautils/config.go | 2 ++ src/runtime/pkg/oci/utils.go | 12 ++++++++++-- src/runtime/virtcontainers/hypervisor.go | 2 +- src/runtime/virtcontainers/sandbox.go | 2 ++ .../node-builder/azure-linux/package_build.sh | 2 +- 8 files changed, 27 insertions(+), 8 deletions(-) diff --git a/src/runtime/Makefile b/src/runtime/Makefile index 5a3a8f9ee01b..cc006a20db58 100644 --- a/src/runtime/Makefile +++ b/src/runtime/Makefile @@ -201,7 +201,7 @@ STRATOVIRTPATH = $(STRATOVIRTBINDIR)/$(STRATOVIRTCMD) STRATOVIRTVALIDHYPERVISORPATHS := [\"$(STRATOVIRTPATH)\"] # Default number of vCPUs -DEFVCPUS := 1 +DEFVCPUS ?= 1 # Default maximum number of vCPUs DEFMAXVCPUS := 0 # Default memory size in MiB @@ -286,8 +286,9 @@ DEFSTATICRESOURCEMGMT ?= false DEFSTATICRESOURCEMGMT_TEE = true DEFSTATICRESOURCEMGMT_NV = true -# Default memory for use for workloads within the sandbox if no specific workload memory value is requested +# Default memory and vcpus for use for workloads within the sandbox if no specific workload values are requested DEFSTATICSANDBOXWORKLOADMEM ?= 2048 +DEFSTATICSANDBOXWORKLOADVCPUS ?= 1 DEFDISABLEIMAGENVDIMM ?= false DEFDISABLEIMAGENVDIMM_NV = true @@ -784,6 +785,7 @@ USER_VARS += DEFSTATICRESOURCEMGMT_STRATOVIRT USER_VARS += DEFSTATICRESOURCEMGMT_TEE USER_VARS += DEFSTATICRESOURCEMGMT_NV USER_VARS += DEFSTATICSANDBOXWORKLOADMEM +USER_VARS += DEFSTATICSANDBOXWORKLOADVCPUS USER_VARS += DEFBINDMOUNTS USER_VARS += DEFCREATECONTAINERTIMEOUT USER_VARS += DEFDANCONF diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in index 267c06b24fde..babbfe8d9bcc 100644 --- a/src/runtime/config/configuration-clh.toml.in +++ b/src/runtime/config/configuration-clh.toml.in @@ -76,7 +76,7 @@ kernel_params = "@KERNELPARAMS@" # < 0 --> will be set to the actual number of physical cores # > 0 <= number of physical cores --> will be set to the specified number # > number of physical cores --> will be set to the actual number of physical cores -default_vcpus = 1 +default_vcpus = @DEFVCPUS@ # Default maximum number of vCPUs per SB/VM: # unspecified or == 0 --> will be set to the actual number of physical cores or to the maximum number @@ -437,6 +437,11 @@ static_sandbox_resource_mgmt = @DEFSTATICRESOURCEMGMT_CLH@ # default amount of memory available within the sandbox. static_sandbox_default_workload_mem=@DEFSTATICSANDBOXWORKLOADMEM@ +# If set, the runtime will use the value as the default number of vcpus for the sandbox when no workload vcpu request is passed +# down to the shim via the OCI when static sandbox resource management is enabled. With this, we ensure that workloads have a proper +# default amount of vcpus available within the sandbox. +static_sandbox_default_workload_vcpus=@DEFSTATICSANDBOXWORKLOADVCPUS@ + # If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path. # This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory. # If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts` diff --git a/src/runtime/pkg/katautils/config-settings.go.in b/src/runtime/pkg/katautils/config-settings.go.in index 387671d79a90..88a40931deff 100644 --- a/src/runtime/pkg/katautils/config-settings.go.in +++ b/src/runtime/pkg/katautils/config-settings.go.in @@ -58,7 +58,7 @@ var systemdUnitName = "kata-containers.target" const defaultKernelParams = "" const defaultMachineType = "q35" -const defaultVCPUCount uint32 = 1 +const defaultVCPUCount uint32 = 0 const defaultMaxVCPUCount uint32 = 0 const defaultMemSize uint32 = 2048 // MiB const defaultMemSlots uint32 = 10 diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index b69873d911bc..f476ad35364b 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -197,6 +197,7 @@ type runtime struct { SandboxCgroupOnly bool `toml:"sandbox_cgroup_only"` StaticSandboxResourceMgmt bool `toml:"static_sandbox_resource_mgmt"` StaticSandboxWorkloadDefaultMem uint32 `toml:"static_sandbox_default_workload_mem"` + StaticSandboxWorkloadDefaultVcpus float32 `toml:"static_sandbox_default_workload_vcpus"` EnablePprof bool `toml:"enable_pprof"` DisableGuestEmptyDir bool `toml:"disable_guest_empty_dir"` CreateContainerTimeout uint64 `toml:"create_container_timeout"` @@ -1616,6 +1617,7 @@ func LoadConfiguration(configPath string, ignoreLogging bool) (resolvedConfigPat config.GuestSeLinuxLabel = tomlConf.Runtime.GuestSeLinuxLabel config.StaticSandboxResourceMgmt = tomlConf.Runtime.StaticSandboxResourceMgmt config.StaticSandboxWorkloadDefaultMem = tomlConf.Runtime.StaticSandboxWorkloadDefaultMem + config.StaticSandboxWorkloadDefaultVcpus = tomlConf.Runtime.StaticSandboxWorkloadDefaultVcpus config.SandboxCgroupOnly = tomlConf.Runtime.SandboxCgroupOnly config.DisableNewNetNs = tomlConf.Runtime.DisableNewNetNs config.EnablePprof = tomlConf.Runtime.EnablePprof diff --git a/src/runtime/pkg/oci/utils.go b/src/runtime/pkg/oci/utils.go index 9535d3deae0b..df7f15c5717e 100644 --- a/src/runtime/pkg/oci/utils.go +++ b/src/runtime/pkg/oci/utils.go @@ -156,6 +156,9 @@ type RuntimeConfig struct { // Memory to allocate for workloads within the sandbox when workload memory is unspecified StaticSandboxWorkloadDefaultMem uint32 + // vcpus to allocate for workloads within the sandbox when workload vcpus is unspecified + StaticSandboxWorkloadDefaultVcpus float32 + // Determines if create a netns for hypervisor process DisableNewNetNs bool @@ -1201,6 +1204,8 @@ func SandboxConfig(ocispec specs.Spec, runtime RuntimeConfig, bundlePath, cid st StaticWorkloadDefaultMem: runtime.StaticSandboxWorkloadDefaultMem, + StaticWorkloadDefaultVcpus: runtime.StaticSandboxWorkloadDefaultVcpus, + ShmSize: shmSize, VfioMode: runtime.VfioMode, @@ -1231,11 +1236,14 @@ func SandboxConfig(ocispec specs.Spec, runtime RuntimeConfig, bundlePath, cid st // with the base number of CPU/memory (which is equal to the default CPU/memory specified for the runtime // configuration or annotations) as well as any specified workload resources. if sandboxConfig.StaticResourceMgmt { - // If no Limits are set in pod config, use StaticWorkloadDefaultMem to ensure the containers generally - // have a reasonable amount of memory available + // If no Limits are set in pod config, use StaticWorkloadDefaultMem/Vcpus to ensure the containers generally + // have a reasonable amount of resources available if sandboxConfig.SandboxResources.WorkloadMemMB == 0 { sandboxConfig.SandboxResources.WorkloadMemMB = sandboxConfig.StaticWorkloadDefaultMem } + if sandboxConfig.SandboxResources.WorkloadCPUs == 0 { + sandboxConfig.SandboxResources.WorkloadCPUs = sandboxConfig.StaticWorkloadDefaultVcpus + } sandboxConfig.SandboxResources.BaseCPUs = sandboxConfig.HypervisorConfig.NumVCPUsF sandboxConfig.SandboxResources.BaseMemMB = sandboxConfig.HypervisorConfig.MemorySize diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index 4e1c70ea3813..5eb95b74412c 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -67,7 +67,7 @@ const ( procCPUInfo = "/proc/cpuinfo" - defaultVCPUs = float32(1) + defaultVCPUs = float32(0) // 2 GiB defaultMemSzMiB = 2048 diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index 9e01461f9fd5..cc0ebc2f54ea 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -163,6 +163,8 @@ type SandboxConfig struct { StaticWorkloadDefaultMem uint32 + StaticWorkloadDefaultVcpus float32 + // Memory to allocate for workloads within the sandbox when workload memory is unspecified ShmSize uint64 diff --git a/tools/osbuilder/node-builder/azure-linux/package_build.sh b/tools/osbuilder/node-builder/azure-linux/package_build.sh index ce53530f2883..fb93eec197f0 100755 --- a/tools/osbuilder/node-builder/azure-linux/package_build.sh +++ b/tools/osbuilder/node-builder/azure-linux/package_build.sh @@ -21,7 +21,7 @@ source "${common_file}" # these options ensure we produce the proper CLH config file runtime_make_flags="SKIP_GO_VERSION_CHECK=1 QEMUCMD= FCCMD= ACRNCMD= STRATOVIRTCMD= DEFAULT_HYPERVISOR=cloud-hypervisor - DEFMEMSZ=0 DEFSTATICSANDBOXWORKLOADMEM=512 DEFVIRTIOFSDAEMON=${VIRTIOFSD_BINARY_LOCATION} PREFIX=${INSTALL_PATH_PREFIX}" + DEFMEMSZ=0 DEFSTATICSANDBOXWORKLOADMEM=512 DEFVCPUS=0 DEFSTATICSANDBOXWORKLOADVCPUS=1 DEFVIRTIOFSDAEMON=${VIRTIOFSD_BINARY_LOCATION} PREFIX=${INSTALL_PATH_PREFIX}" # - for vanilla Kata we use the kernel binary. For ConfPods we use IGVM, so no need to provide kernel path. # - for vanilla Kata we explicitly set DEFSTATICRESOURCEMGMT_CLH. For ConfPods, From f38ae83840b138999c1ec1fcc53f0df049e80694 Mon Sep 17 00:00:00 2001 From: Manuel Huber Date: Wed, 30 Jul 2025 13:09:17 -0700 Subject: [PATCH 05/36] ci: Update codeql.yml with proper branches Point to msft-preview Signed-off-by: Manuel Huber Signed-off-by: Saul Paredes --- .github/workflows/codeql.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 4730cc74d0c8..e3209b1b1ae8 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -13,9 +13,9 @@ name: "CodeQL Advanced" on: push: - branches: [ "main" ] + branches: [ "msft-preview", "release/*" ] pull_request: - branches: [ "main" ] + branches: [ "msft-preview", "release/*" ] schedule: - cron: '45 0 * * 1' From c06b4704fbb730106341b33ce55ae2198ee300e4 Mon Sep 17 00:00:00 2001 From: Cameron Baird Date: Tue, 29 Jul 2025 21:44:59 +0000 Subject: [PATCH 06/36] runtime: Enforce that OCI memory limit exceeds 128MB baseline For our Kata UVM, we know we need at least 128MB of memory to prevent instability in the guest. Enforce this constraint with a descriptive error to prevent users from destabilizing the UVM with faulty k8s configurations. Signed-off-by: Cameron Baird --- src/runtime/Makefile | 6 ++++++ src/runtime/config/configuration-clh.toml.in | 5 +++++ src/runtime/pkg/katautils/config.go | 2 ++ src/runtime/pkg/oci/utils.go | 7 +++++++ tools/osbuilder/node-builder/azure-linux/package_build.sh | 4 ++-- 5 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/runtime/Makefile b/src/runtime/Makefile index cc006a20db58..094bdb722974 100644 --- a/src/runtime/Makefile +++ b/src/runtime/Makefile @@ -290,6 +290,11 @@ DEFSTATICRESOURCEMGMT_NV = true DEFSTATICSANDBOXWORKLOADMEM ?= 2048 DEFSTATICSANDBOXWORKLOADVCPUS ?= 1 +# If set, the runtime will enforce that pods deployed in a sandbox +# explicitly setting memory limits using resources.limits.memory +# allow at least this amount of memory in MiB so that the sandbox can properly start. +DEFSANDBOXWORKLOADMEMMIN ?= 128 + DEFDISABLEIMAGENVDIMM ?= false DEFDISABLEIMAGENVDIMM_NV = true DEFDISABLEIMAGENVDIMM_CLH ?= true @@ -785,6 +790,7 @@ USER_VARS += DEFSTATICRESOURCEMGMT_STRATOVIRT USER_VARS += DEFSTATICRESOURCEMGMT_TEE USER_VARS += DEFSTATICRESOURCEMGMT_NV USER_VARS += DEFSTATICSANDBOXWORKLOADMEM +USER_VARS += DEFSANDBOXWORKLOADMEMMIN USER_VARS += DEFSTATICSANDBOXWORKLOADVCPUS USER_VARS += DEFBINDMOUNTS USER_VARS += DEFCREATECONTAINERTIMEOUT diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in index babbfe8d9bcc..3d3cb5ca0c2a 100644 --- a/src/runtime/config/configuration-clh.toml.in +++ b/src/runtime/config/configuration-clh.toml.in @@ -442,6 +442,11 @@ static_sandbox_default_workload_mem=@DEFSTATICSANDBOXWORKLOADMEM@ # default amount of vcpus available within the sandbox. static_sandbox_default_workload_vcpus=@DEFSTATICSANDBOXWORKLOADVCPUS@ +# The runtime will enforce that pods deployed in a sandbox +# explicitly setting memory limits using resources.limits.memory +# allow at least this amount of memory in MiB so that the sandbox can properly start. +sandbox_workload_mem_min=@DEFSANDBOXWORKLOADMEMMIN@ + # If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path. # This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory. # If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts` diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index f476ad35364b..6ff340b0c136 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -198,6 +198,7 @@ type runtime struct { StaticSandboxResourceMgmt bool `toml:"static_sandbox_resource_mgmt"` StaticSandboxWorkloadDefaultMem uint32 `toml:"static_sandbox_default_workload_mem"` StaticSandboxWorkloadDefaultVcpus float32 `toml:"static_sandbox_default_workload_vcpus"` + SandboxWorkloadMemMin uint32 `toml:"sandbox_workload_mem_min"` EnablePprof bool `toml:"enable_pprof"` DisableGuestEmptyDir bool `toml:"disable_guest_empty_dir"` CreateContainerTimeout uint64 `toml:"create_container_timeout"` @@ -1617,6 +1618,7 @@ func LoadConfiguration(configPath string, ignoreLogging bool) (resolvedConfigPat config.GuestSeLinuxLabel = tomlConf.Runtime.GuestSeLinuxLabel config.StaticSandboxResourceMgmt = tomlConf.Runtime.StaticSandboxResourceMgmt config.StaticSandboxWorkloadDefaultMem = tomlConf.Runtime.StaticSandboxWorkloadDefaultMem + config.SandboxWorkloadMemMin = tomlConf.Runtime.SandboxWorkloadMemMin config.StaticSandboxWorkloadDefaultVcpus = tomlConf.Runtime.StaticSandboxWorkloadDefaultVcpus config.SandboxCgroupOnly = tomlConf.Runtime.SandboxCgroupOnly config.DisableNewNetNs = tomlConf.Runtime.DisableNewNetNs diff --git a/src/runtime/pkg/oci/utils.go b/src/runtime/pkg/oci/utils.go index df7f15c5717e..f5e74169aed0 100644 --- a/src/runtime/pkg/oci/utils.go +++ b/src/runtime/pkg/oci/utils.go @@ -159,6 +159,9 @@ type RuntimeConfig struct { // vcpus to allocate for workloads within the sandbox when workload vcpus is unspecified StaticSandboxWorkloadDefaultVcpus float32 + // Minimum memory (in MiB) to enforce is allocated for workloads within the sandbox when workload memory is specified + SandboxWorkloadMemMin uint32 + // Determines if create a netns for hypervisor process DisableNewNetNs bool @@ -1262,6 +1265,10 @@ func SandboxConfig(ocispec specs.Spec, runtime RuntimeConfig, bundlePath, cid st } + if sandboxConfig.SandboxResources.WorkloadMemMB < runtime.SandboxWorkloadMemMin { + return vc.SandboxConfig{}, fmt.Errorf("pod memory limit too low: minimum %dMiB, got %dMiB", runtime.SandboxWorkloadMemMin, sandboxConfig.SandboxResources.WorkloadMemMB) + } + return sandboxConfig, nil } diff --git a/tools/osbuilder/node-builder/azure-linux/package_build.sh b/tools/osbuilder/node-builder/azure-linux/package_build.sh index fb93eec197f0..346ba5a9f092 100755 --- a/tools/osbuilder/node-builder/azure-linux/package_build.sh +++ b/tools/osbuilder/node-builder/azure-linux/package_build.sh @@ -29,9 +29,9 @@ runtime_make_flags="SKIP_GO_VERSION_CHECK=1 QEMUCMD= FCCMD= ACRNCMD= STRATOVIRTC # - for ConfPods we explicitly set the cloud-hypervisor path. The path is independent of the PREFIX variable # as we have a single CLH binary for both vanilla Kata and ConfPods if [ "${CONF_PODS}" == "no" ]; then - runtime_make_flags+=" DEFSTATICRESOURCEMGMT_CLH=true KERNELPATH_CLH=${KERNEL_BINARY_LOCATION}" + runtime_make_flags+=" DEFSTATICRESOURCEMGMT_CLH=true KERNELPATH_CLH=${KERNEL_BINARY_LOCATION} DEFSANDBOXWORKLOADMEMMIN=128" else - runtime_make_flags+=" CLHPATH=${CLOUD_HYPERVISOR_LOCATION}" + runtime_make_flags+=" CLHPATH=${CLOUD_HYPERVISOR_LOCATION} DEFSANDBOXWORKLOADMEMMIN=192" fi # On Mariner 3.0 we use cgroupsv2 with a single sandbox cgroup From 2f5c8145380ff15cd5fc5987ce8663d626975bd6 Mon Sep 17 00:00:00 2001 From: Saul Paredes Date: Thu, 9 Jan 2025 15:25:51 -0800 Subject: [PATCH 07/36] webhook: enforce minimum memory limit If memory limit is set and less than minimum, set it to minimum. This is to to account for https://github.com/kata-containers/kata-containers/commit/0ec34036bbf691f075964c41f3a02fa63b009817 Signed-off-by: Saul Paredes --- tools/testing/kata-webhook/deploy/webhook.yaml | 9 ++++++++- tools/testing/kata-webhook/main.go | 18 ++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/tools/testing/kata-webhook/deploy/webhook.yaml b/tools/testing/kata-webhook/deploy/webhook.yaml index efd7d8225a8c..cb4283e787fd 100644 --- a/tools/testing/kata-webhook/deploy/webhook.yaml +++ b/tools/testing/kata-webhook/deploy/webhook.yaml @@ -20,7 +20,7 @@ spec: spec: containers: - name: pod-annotate-webhook - image: quay.io/kata-containers/kata-webhook-example:latest + image: marineraks.azurecr.io/kata-containers/kata-webhook:min_memory_limit imagePullPolicy: Always env: - name: RUNTIME_CLASS @@ -29,6 +29,12 @@ spec: name: kata-webhook key: runtime_class optional: true + - name: MIN_MEMORY_LIMIT + valueFrom: + configMapKeyRef: + name: kata-webhook + key: min_memory_limit + optional: true args: - -tls-cert-file=/etc/webhook/certs/cert.pem - -tls-key-file=/etc/webhook/certs/key.pem @@ -74,3 +80,4 @@ metadata: name: kata-webhook data: runtime_class: kata + min_memory_limit: "128Mi" diff --git a/tools/testing/kata-webhook/main.go b/tools/testing/kata-webhook/main.go index 6f3f8bd1c56d..63d0e477fea4 100644 --- a/tools/testing/kata-webhook/main.go +++ b/tools/testing/kata-webhook/main.go @@ -14,6 +14,7 @@ import ( "strings" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/sirupsen/logrus" @@ -75,6 +76,23 @@ func annotatePodMutator(_ context.Context, ar *kwhmodel.AdmissionReview, obj met kataRuntimeClassName := getRuntimeClass(runtimeClassEnvKey, "kata") pod.Spec.RuntimeClassName = &kataRuntimeClassName + minMemoryLimit, foundMinMemoryLimit := os.LookupEnv("MIN_MEMORY_LIMIT") + + if foundMinMemoryLimit { + minMemoryLimitVal := resource.MustParse(minMemoryLimit) + for i := range pod.Spec.Containers { + if pod.Spec.Containers[i].Resources.Limits == nil { + continue + } else { + currentMemoryLimit := pod.Spec.Containers[i].Resources.Limits.Memory().Value() + if currentMemoryLimit < minMemoryLimitVal.Value() { + pod.Spec.Containers[i].Resources.Limits["memory"] = resource.MustParse(minMemoryLimit) + fmt.Println("memory limit too low. Updating to : ", pod.Spec.Containers[i].Resources.Limits) + } + } + } + } + return &kwhmutating.MutatorResult{ MutatedObject: pod, }, nil From 4e81837f214a89e77031ba844b8efb79db441beb Mon Sep 17 00:00:00 2001 From: "microsoft-github-policy-service[bot]" <77245923+microsoft-github-policy-service[bot]@users.noreply.github.com> Date: Tue, 19 Dec 2023 19:41:34 +0000 Subject: [PATCH 08/36] docs: add Microsoft mandatory file Add Microsoft mandatory file SECURITY.md Signed-off-by: Saul Paredes --- SECURITY.md | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 SECURITY.md diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 000000000000..b3c89efc852e --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,41 @@ + + +## Security + +Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet) and [Xamarin](https://github.com/xamarin). + +If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/security.md/definition), please report it to us as described below. + +## Reporting Security Issues + +**Please do not report security vulnerabilities through public GitHub issues.** + +Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/security.md/msrc/create-report). + +If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/security.md/msrc/pgp). + +You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). + +Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: + + * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) + * Full paths of source file(s) related to the manifestation of the issue + * The location of the affected source code (tag/branch/commit or direct URL) + * Any special configuration required to reproduce the issue + * Step-by-step instructions to reproduce the issue + * Proof-of-concept or exploit code (if possible) + * Impact of the issue, including how an attacker might exploit the issue + +This information will help us triage your report more quickly. + +If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/security.md/msrc/bounty) page for more details about our active programs. + +## Preferred Languages + +We prefer all communications to be in English. + +## Policy + +Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/security.md/cvd). + + From 9ef2bb6d37b0587ac68fb6d73ed3b10fca46e591 Mon Sep 17 00:00:00 2001 From: Saul Paredes Date: Tue, 10 Mar 2026 16:12:50 -0700 Subject: [PATCH 09/36] runtime: clh: update cloud-hypervisor API reference - Change Makefile to point to fork - Change versions.yaml to point to proper version on fork Signed-off-by: Saul Paredes --- src/runtime/virtcontainers/pkg/cloud-hypervisor/Makefile | 2 +- .../virtcontainers/pkg/cloud-hypervisor/client/README.md | 4 ++++ versions.yaml | 6 +++--- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/Makefile b/src/runtime/virtcontainers/pkg/cloud-hypervisor/Makefile index bf5c241ccf6d..d9e20472cbf1 100644 --- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/Makefile +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/Makefile @@ -26,7 +26,7 @@ ifndef YQ $(MK_DIR)/../../../../../ci//install_yq.sh endif clh_version=$(shell yq .assets.hypervisor.cloud_hypervisor.version $(VERSIONS_FILE)); \ - curl -OL https://raw.githubusercontent.com/cloud-hypervisor/cloud-hypervisor/$$clh_version/vmm/src/api/openapi/cloud-hypervisor.yaml + curl -OL https://raw.githubusercontent.com/microsoft/cloud-hypervisor/refs/tags/msft/$$clh_version/vmm/src/api/openapi/cloud-hypervisor.yaml clean-generated-code: rm "./client" -rf diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/README.md b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/README.md index e2b529582305..20bdbf5c1558 100644 --- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/README.md +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/README.md @@ -16,6 +16,7 @@ Install the following dependencies: ```shell go get github.com/stretchr/testify/assert go get golang.org/x/oauth2 +go get golang.org/x/net/context ``` Put the package under your project folder and add the following in import: @@ -176,3 +177,6 @@ Each of these functions takes a value of the given basic type and returns a poin * `PtrTime` ## Author + + + diff --git a/versions.yaml b/versions.yaml index 27336c41aec5..b950013f4485 100644 --- a/versions.yaml +++ b/versions.yaml @@ -72,10 +72,10 @@ assets: cloud_hypervisor: description: "Cloud Hypervisor is an open source Virtual Machine Monitor" - url: "https://github.com/cloud-hypervisor/cloud-hypervisor" + url: "https://github.com/microsoft/cloud-hypervisor" uscan-url: >- - https://github.com/cloud-hypervisor/cloud-hypervisor/tags.*/v?(\d\S+)\.tar\.gz - version: "v48.0" + https://github.com/microsoft/cloud-hypervisor/tags.*/v?(\d\S+)\.tar\.gz + version: "v48.0.246" firecracker: description: "Firecracker micro-VMM" From a136359ce5eea316f5b89889b2a7ea93736ad767 Mon Sep 17 00:00:00 2001 From: Saul Paredes Date: Tue, 19 Aug 2025 12:30:42 -0700 Subject: [PATCH 10/36] network: preseed default-gateway neighbor This change mirrors host networking into the guest as before, but now also includes the default gateway neighbor entry for each interface. Pods using overlay/synthetic gateways (e.g., 169.254.1.1) can hit a first-connect race while the guest performs the initial ARP. Preseeding the gateway neighbor removes that latency and makes early connections (e.g., to the API Service) deterministic. Signed-off-by: Saul Paredes --- src/runtime/virtcontainers/network.go | 7 ++-- src/runtime/virtcontainers/network_linux.go | 38 +++++++++++++++++++-- 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/src/runtime/virtcontainers/network.go b/src/runtime/virtcontainers/network.go index 0960d47d3706..9d6a8faedeb9 100644 --- a/src/runtime/virtcontainers/network.go +++ b/src/runtime/virtcontainers/network.go @@ -315,13 +315,16 @@ func generateVCNetworkStructures(ctx context.Context, endpoints []Endpoint) ([]* routes = append(routes, &r) } + gatewaySet := gatewaySetFromRoutes(endpoint.Properties().Routes) + for _, neigh := range endpoint.Properties().Neighbors { - var n pbTypes.ARPNeighbor - if !validGuestNeighbor(neigh) { + if !validGuestNeighbor(neigh, gatewaySet) { continue } + var n pbTypes.ARPNeighbor + n.Device = endpoint.Name() n.State = int32(neigh.State) n.Flags = int32(neigh.Flags) diff --git a/src/runtime/virtcontainers/network_linux.go b/src/runtime/virtcontainers/network_linux.go index 3f4b419d22c1..d21c38a2a068 100644 --- a/src/runtime/virtcontainers/network_linux.go +++ b/src/runtime/virtcontainers/network_linux.go @@ -1614,7 +1614,39 @@ func validGuestRoute(route netlink.Route) bool { return route.Protocol != unix.RTPROT_KERNEL } -func validGuestNeighbor(neigh netlink.Neigh) bool { - // We add only static ARP entries - return neigh.State == netlink.NUD_PERMANENT +// neighbor is valid if it is static or a default-gateway +func validGuestNeighbor(neigh netlink.Neigh, gatewaySet map[string]struct{}) bool { + // need a MAC for the guest + if neigh.HardwareAddr == nil { + return false + } + // Keep all static entries + if neigh.State == netlink.NUD_PERMANENT { + return true + } + // Gateway-only exception: allow the default-gateway IP: + // On some setups, the pod subnet gateway does not appear in the host ARP cache as a static entry. + // On these setups an ARP request storm happens when many Kata PODs are started at the same time and they all look for the gateway MAC address. + // This forces the gateway to churn a lot of ARP requests and render the ARP request full, hence dropping some ARP requests. + // Manually pre-populating the ARP entry in the UVM guest ARP cache for that gateway solves that problem. + _, isGw := gatewaySet[neigh.IP.String()] + return isGw +} + +// helper: default routes => set of gateway IP strings +func gatewaySetFromRoutes(routes []netlink.Route) map[string]struct{} { + gatewaySet := make(map[string]struct{}) + for _, route := range routes { + if route.Gw == nil { + continue + } + if route.Dst == nil { + gatewaySet[route.Gw.String()] = struct{}{} + continue + } + if ones, _ := route.Dst.Mask.Size(); ones == 0 { // 0.0.0.0/0 or ::/0 + gatewaySet[route.Gw.String()] = struct{}{} + } + } + return gatewaySet } From e58badb90231dc1e7fd4e3b157b2d272b46efd1b Mon Sep 17 00:00:00 2001 From: Saul Paredes Date: Tue, 24 Mar 2026 14:03:41 -0700 Subject: [PATCH 11/36] ci: replace 'main' with 'msft-preview' This is a fork temporary measure to unblock CI required tests in our fork, while we find a way to remove the 'main' hard codes from upstream. Signed-off-by: Saul Paredes --- .github/workflows/build-kubectl-image.yaml | 2 +- .github/workflows/ci-on-push.yaml | 2 +- .github/workflows/commit-message-check.yaml | 2 +- .github/workflows/docs.yaml | 2 +- .github/workflows/osv-scanner.yaml | 4 ++-- .github/workflows/payload-after-push.yaml | 2 +- .github/workflows/push-oras-tarball-cache.yaml | 4 ++-- .github/workflows/release-amd64.yaml | 2 +- .github/workflows/release-arm64.yaml | 2 +- .github/workflows/release-ppc64le.yaml | 2 +- .github/workflows/release-s390x.yaml | 2 +- .github/workflows/scorecard.yaml | 4 ++-- 12 files changed, 15 insertions(+), 15 deletions(-) diff --git a/.github/workflows/build-kubectl-image.yaml b/.github/workflows/build-kubectl-image.yaml index f685d552b5a4..b36ed2834d77 100644 --- a/.github/workflows/build-kubectl-image.yaml +++ b/.github/workflows/build-kubectl-image.yaml @@ -8,7 +8,7 @@ on: # Allow manual triggering push: branches: - - main + - msft-preview paths: - 'tools/packaging/kubectl/Dockerfile' - '.github/workflows/build-kubectl-image.yaml' diff --git a/.github/workflows/ci-on-push.yaml b/.github/workflows/ci-on-push.yaml index e9ca81a0b482..b45760181f61 100644 --- a/.github/workflows/ci-on-push.yaml +++ b/.github/workflows/ci-on-push.yaml @@ -2,7 +2,7 @@ name: Kata Containers CI on: pull_request_target: # zizmor: ignore[dangerous-triggers] See #11332. branches: - - 'main' + - 'msft-preview' types: # Adding 'labeled' to the list of activity types that trigger this event # (default: opened, synchronize, reopened) so that we can run this diff --git a/.github/workflows/commit-message-check.yaml b/.github/workflows/commit-message-check.yaml index 469dd91bad4c..0f4695d3a6bf 100644 --- a/.github/workflows/commit-message-check.yaml +++ b/.github/workflows/commit-message-check.yaml @@ -16,7 +16,7 @@ env: error_msg: |+ See the document below for help on formatting commits for the project. - https://github.com/kata-containers/community/blob/main/CONTRIBUTING.md#patch-format + https://github.com/kata-containers/community/blob/msft-preview/CONTRIBUTING.md#patch-format jobs: commit-message-check: diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index e101d1ee144e..9ede371b4fa1 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -2,7 +2,7 @@ name: Documentation on: push: branches: - - main + - msft-preview permissions: {} jobs: deploy-docs: diff --git a/.github/workflows/osv-scanner.yaml b/.github/workflows/osv-scanner.yaml index 7c3c730e118b..4413d74739c4 100644 --- a/.github/workflows/osv-scanner.yaml +++ b/.github/workflows/osv-scanner.yaml @@ -9,11 +9,11 @@ name: OSV-Scanner on: workflow_dispatch: pull_request: - branches: [ "main" ] + branches: [ "msft-preview" ] schedule: - cron: '0 1 * * 0' push: - branches: [ "main" ] + branches: [ "msft-preview" ] permissions: {} diff --git a/.github/workflows/payload-after-push.yaml b/.github/workflows/payload-after-push.yaml index 891ccea029fc..4a88dc139107 100644 --- a/.github/workflows/payload-after-push.yaml +++ b/.github/workflows/payload-after-push.yaml @@ -2,7 +2,7 @@ name: CI | Publish Kata Containers payload on: push: branches: - - main + - msft-preview workflow_dispatch: permissions: {} diff --git a/.github/workflows/push-oras-tarball-cache.yaml b/.github/workflows/push-oras-tarball-cache.yaml index 1243f32959d8..a52fb3ce3894 100644 --- a/.github/workflows/push-oras-tarball-cache.yaml +++ b/.github/workflows/push-oras-tarball-cache.yaml @@ -1,11 +1,11 @@ # Push gperf and busybox tarballs to the ORAS cache (ghcr.io) so that # download-with-oras-cache.sh can pull them instead of hitting upstream. -# Runs when versions.yaml changes on main (e.g. after a PR merge) or manually. +# Runs when versions.yaml changes on msft-preview (e.g. after a PR merge) or manually. name: CI | Push ORAS tarball cache on: push: branches: - - main + - msft-preview paths: - 'versions.yaml' workflow_dispatch: diff --git a/.github/workflows/release-amd64.yaml b/.github/workflows/release-amd64.yaml index 9707260d7dba..fb1a47c8836b 100644 --- a/.github/workflows/release-amd64.yaml +++ b/.github/workflows/release-amd64.yaml @@ -66,7 +66,7 @@ jobs: # We need to do such trick here as the format of the $GITHUB_REF # is "refs/tags/" tag=$(echo "$GITHUB_REF" | cut -d/ -f3-) - if [ "${tag}" = "main" ]; then + if [ "${tag}" = "msft-preview" ]; then tag=$(./tools/packaging/release/release.sh release-version) tags=("${tag}" "latest") else diff --git a/.github/workflows/release-arm64.yaml b/.github/workflows/release-arm64.yaml index e81d83d54f74..874d22e2f063 100644 --- a/.github/workflows/release-arm64.yaml +++ b/.github/workflows/release-arm64.yaml @@ -66,7 +66,7 @@ jobs: # We need to do such trick here as the format of the $GITHUB_REF # is "refs/tags/" tag=$(echo "$GITHUB_REF" | cut -d/ -f3-) - if [ "${tag}" = "main" ]; then + if [ "${tag}" = "msft-preview" ]; then tag=$(./tools/packaging/release/release.sh release-version) tags=("${tag}" "latest") else diff --git a/.github/workflows/release-ppc64le.yaml b/.github/workflows/release-ppc64le.yaml index 2a63579ff9ee..4260554e87a2 100644 --- a/.github/workflows/release-ppc64le.yaml +++ b/.github/workflows/release-ppc64le.yaml @@ -63,7 +63,7 @@ jobs: # We need to do such trick here as the format of the $GITHUB_REF # is "refs/tags/" tag=$(echo "$GITHUB_REF" | cut -d/ -f3-) - if [ "${tag}" = "main" ]; then + if [ "${tag}" = "msft-preview" ]; then tag=$(./tools/packaging/release/release.sh release-version) tags=("${tag}" "latest") else diff --git a/.github/workflows/release-s390x.yaml b/.github/workflows/release-s390x.yaml index 8f86d63e70fc..585add2e113b 100644 --- a/.github/workflows/release-s390x.yaml +++ b/.github/workflows/release-s390x.yaml @@ -67,7 +67,7 @@ jobs: # We need to do such trick here as the format of the $GITHUB_REF # is "refs/tags/" tag=$(echo "$GITHUB_REF" | cut -d/ -f3-) - if [ "${tag}" = "main" ]; then + if [ "${tag}" = "msft-preview" ]; then tag=$(./tools/packaging/release/release.sh release-version) tags=("${tag}" "latest") else diff --git a/.github/workflows/scorecard.yaml b/.github/workflows/scorecard.yaml index bbe22d5a826f..93e0994d62ab 100644 --- a/.github/workflows/scorecard.yaml +++ b/.github/workflows/scorecard.yaml @@ -5,10 +5,10 @@ name: Scorecard supply-chain security on: # For Branch-Protection check. Only the default branch is supported. See - # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection + # https://github.com/ossf/scorecard/blob/msft-preview/docs/checks.md#branch-protection branch_protection_rule: push: - branches: [ "main" ] + branches: [ "msft-preview" ] workflow_dispatch: permissions: {} From 2cb2026d17c0e526ddef86a51a5bbd9d7535e170 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bombo?= Date: Wed, 10 Sep 2025 15:30:18 -0500 Subject: [PATCH 12/36] ci: security: Use pull_request instead of pull_request_target MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Background: * `pull_request` runs on the PR branch code and has access to secrets ONLY if the PR is from microsoft/kata-containers (i.e. NOT from an external contributor who forked the repo). * `pull_request_target` runs on the trusted main branch code by default and has access to secrets for any PR. Reference: https://docs.github.com/en/actions/reference/workflows-and-actions/events-that-trigger-workflows#pull_request Upstream uses `pull_request_target` (and manually checks out the PR code) to have access to secrets for PRs from external contributors, however we don't expect external PRs, hence we can use `pull_request`. Furthermore, since `pull_request_target` only runs from the default branch, we need to use `pull_request` anyway as we have multiple leading branches (i.e., msft-main, msft-preview, and release branches). https://github.blog/changelog/2025-11-07-actions-pull_request_target-and-environment-branch-protections-changes/ Signed-off-by: Aurélien Bombo --- .github/workflows/ci-on-push.yaml | 10 ++++++++-- .github/workflows/gatekeeper.yaml | 8 +++++++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci-on-push.yaml b/.github/workflows/ci-on-push.yaml index b45760181f61..c4073f8f3a63 100644 --- a/.github/workflows/ci-on-push.yaml +++ b/.github/workflows/ci-on-push.yaml @@ -1,13 +1,19 @@ name: Kata Containers CI on: - pull_request_target: # zizmor: ignore[dangerous-triggers] See #11332. + # Upstream uses `pull_request_target` to have access to secrets for + # PRs from forks but: + # (1) `pull_request_target` only runs on the default branch and we + # have multiple leading branches, so we need to use `pull_request`. + # (2) We can use `pull_request` practically since we don't expect PRs + # from external contributors. + pull_request: branches: - 'msft-preview' types: # Adding 'labeled' to the list of activity types that trigger this event # (default: opened, synchronize, reopened) so that we can run this # workflow when the 'ok-to-test' label is added. - # Reference: https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#pull_request_target + # Reference: https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#pull_request - opened - synchronize - reopened diff --git a/.github/workflows/gatekeeper.yaml b/.github/workflows/gatekeeper.yaml index 9854f3b5ea1c..b930e3592580 100644 --- a/.github/workflows/gatekeeper.yaml +++ b/.github/workflows/gatekeeper.yaml @@ -5,7 +5,13 @@ name: Gatekeeper # reporting the status. on: - pull_request_target: # zizmor: ignore[dangerous-triggers] See #11332. + # Upstream uses `pull_request_target` to have access to secrets for + # PRs from forks but: + # (1) `pull_request_target` only runs on the default branch and we + # have multiple leading branches, so we need to use `pull_request`. + # (2) We can use `pull_request` practically since we don't expect PRs + # from external contributors. + pull_request: types: - opened - synchronize From 5ffbdea174fa28fdc7c29be7c7b484619f698c85 Mon Sep 17 00:00:00 2001 From: Saul Paredes Date: Tue, 24 Mar 2026 18:13:09 -0700 Subject: [PATCH 13/36] gatekeeper: set default to msft-preview set default to msft-preview Signed-off-by: Saul Paredes --- tools/testing/gatekeeper/skips.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/gatekeeper/skips.py b/tools/testing/gatekeeper/skips.py index d639f9be8a20..0d1058d1b2e4 100644 --- a/tools/testing/gatekeeper/skips.py +++ b/tools/testing/gatekeeper/skips.py @@ -106,4 +106,4 @@ def get_features(self, target_branch): _TESTS = sys.argv[1] == '-t' else: _TESTS = False - sys.exit(Checks().run(_TESTS, os.getenv("TARGET_BRANCH", "main"))) + sys.exit(Checks().run(_TESTS, os.getenv("TARGET_BRANCH", "msft-preview"))) From d1351d3a16a51b9d8f66545ad40bf574821afa23 Mon Sep 17 00:00:00 2001 From: Saul Paredes Date: Wed, 25 Mar 2026 10:20:47 -0700 Subject: [PATCH 14/36] versions: use upstream cloud-hypervisor use upstream cloud-hypervisor. This is to unblock the CI and let CLH build Signed-off-by: Saul Paredes --- src/runtime/virtcontainers/pkg/cloud-hypervisor/Makefile | 2 +- versions.yaml | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/Makefile b/src/runtime/virtcontainers/pkg/cloud-hypervisor/Makefile index d9e20472cbf1..bf5c241ccf6d 100644 --- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/Makefile +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/Makefile @@ -26,7 +26,7 @@ ifndef YQ $(MK_DIR)/../../../../../ci//install_yq.sh endif clh_version=$(shell yq .assets.hypervisor.cloud_hypervisor.version $(VERSIONS_FILE)); \ - curl -OL https://raw.githubusercontent.com/microsoft/cloud-hypervisor/refs/tags/msft/$$clh_version/vmm/src/api/openapi/cloud-hypervisor.yaml + curl -OL https://raw.githubusercontent.com/cloud-hypervisor/cloud-hypervisor/$$clh_version/vmm/src/api/openapi/cloud-hypervisor.yaml clean-generated-code: rm "./client" -rf diff --git a/versions.yaml b/versions.yaml index b950013f4485..27336c41aec5 100644 --- a/versions.yaml +++ b/versions.yaml @@ -72,10 +72,10 @@ assets: cloud_hypervisor: description: "Cloud Hypervisor is an open source Virtual Machine Monitor" - url: "https://github.com/microsoft/cloud-hypervisor" + url: "https://github.com/cloud-hypervisor/cloud-hypervisor" uscan-url: >- - https://github.com/microsoft/cloud-hypervisor/tags.*/v?(\d\S+)\.tar\.gz - version: "v48.0.246" + https://github.com/cloud-hypervisor/cloud-hypervisor/tags.*/v?(\d\S+)\.tar\.gz + version: "v48.0" firecracker: description: "Firecracker micro-VMM" From 491964474be3bb94c2510c1d2e6cc2ee9cd82fb5 Mon Sep 17 00:00:00 2001 From: Saul Paredes Date: Wed, 25 Mar 2026 10:22:52 -0700 Subject: [PATCH 15/36] static-checks: update target branch to msft-preview update target branch to msft-preview Signed-off-by: Saul Paredes --- .github/workflows/static-checks.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/static-checks.yaml b/.github/workflows/static-checks.yaml index fe605e632d8e..aabe047e2a34 100644 --- a/.github/workflows/static-checks.yaml +++ b/.github/workflows/static-checks.yaml @@ -110,6 +110,7 @@ jobs: - "make static-checks" env: GOPATH: ${{ github.workspace }} + target_branch: msft-preview permissions: contents: read # for checkout packages: write # for push to ghcr.io From b27d1964979efbe6c95dabadee00645a7c5c5f44 Mon Sep 17 00:00:00 2001 From: Saul Paredes Date: Wed, 25 Mar 2026 11:10:20 -0700 Subject: [PATCH 16/36] runtime: run gofmt This fixes a CI static check failure Signed-off-by: Saul Paredes --- src/runtime/pkg/katautils/config.go | 44 ++++++++++++++--------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index 6ff340b0c136..63f182278529 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -181,30 +181,30 @@ type hypervisor struct { } type runtime struct { - InterNetworkModel string `toml:"internetworking_model"` - JaegerEndpoint string `toml:"jaeger_endpoint"` - JaegerUser string `toml:"jaeger_user"` - JaegerPassword string `toml:"jaeger_password"` - VfioMode string `toml:"vfio_mode"` - GuestSeLinuxLabel string `toml:"guest_selinux_label"` - SandboxBindMounts []string `toml:"sandbox_bind_mounts"` - Experimental []string `toml:"experimental"` - Tracing bool `toml:"enable_tracing"` - DisableNewNetNs bool `toml:"disable_new_netns"` - DisableGuestSeccomp bool `toml:"disable_guest_seccomp"` - EnableVCPUsPinning bool `toml:"enable_vcpus_pinning"` - Debug bool `toml:"enable_debug"` - SandboxCgroupOnly bool `toml:"sandbox_cgroup_only"` - StaticSandboxResourceMgmt bool `toml:"static_sandbox_resource_mgmt"` - StaticSandboxWorkloadDefaultMem uint32 `toml:"static_sandbox_default_workload_mem"` + InterNetworkModel string `toml:"internetworking_model"` + JaegerEndpoint string `toml:"jaeger_endpoint"` + JaegerUser string `toml:"jaeger_user"` + JaegerPassword string `toml:"jaeger_password"` + VfioMode string `toml:"vfio_mode"` + GuestSeLinuxLabel string `toml:"guest_selinux_label"` + SandboxBindMounts []string `toml:"sandbox_bind_mounts"` + Experimental []string `toml:"experimental"` + Tracing bool `toml:"enable_tracing"` + DisableNewNetNs bool `toml:"disable_new_netns"` + DisableGuestSeccomp bool `toml:"disable_guest_seccomp"` + EnableVCPUsPinning bool `toml:"enable_vcpus_pinning"` + Debug bool `toml:"enable_debug"` + SandboxCgroupOnly bool `toml:"sandbox_cgroup_only"` + StaticSandboxResourceMgmt bool `toml:"static_sandbox_resource_mgmt"` + StaticSandboxWorkloadDefaultMem uint32 `toml:"static_sandbox_default_workload_mem"` StaticSandboxWorkloadDefaultVcpus float32 `toml:"static_sandbox_default_workload_vcpus"` SandboxWorkloadMemMin uint32 `toml:"sandbox_workload_mem_min"` - EnablePprof bool `toml:"enable_pprof"` - DisableGuestEmptyDir bool `toml:"disable_guest_empty_dir"` - CreateContainerTimeout uint64 `toml:"create_container_timeout"` - DanConf string `toml:"dan_conf"` - ForceGuestPull bool `toml:"experimental_force_guest_pull"` - PodResourceAPISock string `toml:"pod_resource_api_sock"` + EnablePprof bool `toml:"enable_pprof"` + DisableGuestEmptyDir bool `toml:"disable_guest_empty_dir"` + CreateContainerTimeout uint64 `toml:"create_container_timeout"` + DanConf string `toml:"dan_conf"` + ForceGuestPull bool `toml:"experimental_force_guest_pull"` + PodResourceAPISock string `toml:"pod_resource_api_sock"` } type agent struct { From 028a2dcd2581446503f3217a6d79438fb1e6406d Mon Sep 17 00:00:00 2001 From: Saul Paredes Date: Wed, 25 Mar 2026 14:37:11 -0700 Subject: [PATCH 17/36] tests: disable tests that are not supported yet in our fork - tests that deploy pods with too small of a memory limit - try to set a minimum memory limit for some containerd tests - tests that use runners we don't have - tests that depend on pushing to GHCR Signed-off-by: Saul Paredes --- .github/workflows/ci.yaml | 11 +- .../workflows/static-checks-self-hosted.yaml | 2 +- .../cri-containerd/integration-tests.sh | 9 ++ tests/integration/nydus/nydus-sandbox.yaml | 3 + tools/testing/gatekeeper/required-tests.yaml | 129 ++++++++++-------- 5 files changed, 91 insertions(+), 63 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 1c8d849d625b..deb9d6457e0e 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -108,6 +108,7 @@ jobs: QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }} build-kata-static-tarball-s390x: + if: false # msft-preview doesn't have these runners permissions: contents: read packages: write @@ -123,6 +124,7 @@ jobs: QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }} build-kata-static-tarball-ppc64le: + if: false # msft-preview doesn't have these runners permissions: contents: read packages: write @@ -172,6 +174,7 @@ jobs: build-and-publish-tee-confidential-unencrypted-image: name: build-and-publish-tee-confidential-unencrypted-image + if: false # msft-preview can't push to GHCR permissions: contents: read packages: write @@ -277,7 +280,7 @@ jobs: target-branch: ${{ inputs.target-branch }} run-k8s-tests-on-aks: - if: ${{ inputs.skip-test != 'yes' }} + if: false # msft-preview doesn't test on AKS yet needs: publish-kata-deploy-payload-amd64 uses: ./.github/workflows/run-k8s-tests-on-aks.yaml @@ -298,7 +301,7 @@ jobs: AZ_SUBSCRIPTION_ID: ${{ secrets.AZ_SUBSCRIPTION_ID }} run-k8s-tests-on-arm64: - if: ${{ inputs.skip-test != 'yes' }} + if: false # msft-preview doesn't have these runners needs: publish-kata-deploy-payload-arm64 uses: ./.github/workflows/run-k8s-tests-on-arm64.yaml with: @@ -310,7 +313,7 @@ jobs: target-branch: ${{ inputs.target-branch }} run-k8s-tests-on-nvidia-gpu: - if: ${{ inputs.skip-test != 'yes' }} + if: false # msft-preview doesn't have these runners needs: publish-kata-deploy-payload-amd64 uses: ./.github/workflows/run-k8s-tests-on-nvidia-gpu.yaml with: @@ -365,7 +368,7 @@ jobs: AUTHENTICATED_IMAGE_PASSWORD: ${{ secrets.AUTHENTICATED_IMAGE_PASSWORD }} run-k8s-tests-on-ppc64le: - if: ${{ inputs.skip-test != 'yes' }} + if: false # msft-preview doesn't have these runners needs: publish-kata-deploy-payload-ppc64le uses: ./.github/workflows/run-k8s-tests-on-ppc64le.yaml with: diff --git a/.github/workflows/static-checks-self-hosted.yaml b/.github/workflows/static-checks-self-hosted.yaml index b654764c737e..a4db44251de1 100644 --- a/.github/workflows/static-checks-self-hosted.yaml +++ b/.github/workflows/static-checks-self-hosted.yaml @@ -23,7 +23,7 @@ jobs: build-checks: needs: skipper - if: ${{ needs.skipper.outputs.skip_static != 'yes' }} + if: false # msft-preview doesn't have these runners strategy: fail-fast: false matrix: diff --git a/tests/integration/cri-containerd/integration-tests.sh b/tests/integration/cri-containerd/integration-tests.sh index 7c4bf9253311..0280dda2f6af 100755 --- a/tests/integration/cri-containerd/integration-tests.sh +++ b/tests/integration/cri-containerd/integration-tests.sh @@ -29,6 +29,9 @@ RUNTIME=${RUNTIME:-containerd-shim-kata-${KATA_HYPERVISOR}-v2} FACTORY_TEST=${FACTORY_TEST:-""} ARCH=$(uname -m) SANDBOXER=${SANDBOXER:-"podsandbox"} +# This is to avoid the following error in msft-preview: +# pod memory limit too low: minimum 128MiB, got 0Mib +POD_MEMORY_LIMIT_IN_BYTES=${POD_MEMORY_LIMIT_IN_BYTES:-268435456} # 256Mi containerd_runtime_type="io.containerd.kata-${KATA_HYPERVISOR}.v2" @@ -191,6 +194,9 @@ metadata: name: busybox-sandbox1 namespace: default uid: busybox-sandbox1-uid +linux: + resources: + memory_limit_in_bytes: ${POD_MEMORY_LIMIT_IN_BYTES} EOF #TestContainerSwap has created its own container_yaml. @@ -502,6 +508,9 @@ metadata: name: busybox-device-cgroup-sandbox namespace: default uid: busybox-device-cgroup-sandbox-uid +linux: + resources: + memory_limit_in_bytes: ${POD_MEMORY_LIMIT_IN_BYTES} EOF cat > "$container1_yaml" < Date: Wed, 18 Mar 2026 16:26:18 -0700 Subject: [PATCH 18/36] clh: Add VFIO device cold-plug support Enable VFIO device pass-through at VM creation time on Cloud Hypervisor, in addition to the existing hot-plug path. Signed-off-by: Roaa Sakr --- src/runtime/config/configuration-clh.toml.in | 6 ++ src/runtime/pkg/katautils/config.go | 8 +- src/runtime/pkg/katautils/create_test.go | 4 +- src/runtime/virtcontainers/clh.go | 40 +++++++++ src/runtime/virtcontainers/clh_test.go | 88 ++++++++++++++++++++ 5 files changed, 141 insertions(+), 5 deletions(-) diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in index 3d3cb5ca0c2a..1e5ca9aa5b00 100644 --- a/src/runtime/config/configuration-clh.toml.in +++ b/src/runtime/config/configuration-clh.toml.in @@ -229,6 +229,12 @@ disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM_CLH@ # The default setting is "no-port" hot_plug_vfio = "no-port" +# In a confidential compute environment hot-plugging can compromise +# security. +# Enable cold-plugging of VFIO devices to a root-port. +# The default setting is "no-port", which means disabled. +cold_plug_vfio = "no-port" + # Path to OCI hook binaries in the *guest rootfs*. # This does not affect host-side hooks which must instead be added to # the OCI spec passed to the runtime. diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index 63f182278529..91421f297695 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -1924,11 +1924,11 @@ func checkPCIeConfig(coldPlug config.PCIePort, hotPlug config.PCIePort, machineT return nil } if hypervisorType == virtcontainers.ClhHypervisor { - if coldPlug != config.NoPort { - return fmt.Errorf("cold-plug not supported on CLH") + if coldPlug != config.NoPort && coldPlug != config.RootPort { + return fmt.Errorf("only cold-plug=%s or %s supported on CLH", config.NoPort, config.RootPort) } - if hotPlug != config.RootPort { - return fmt.Errorf("only hot-plug=%s supported on CLH", config.RootPort) + if hotPlug != config.NoPort && hotPlug != config.RootPort { + return fmt.Errorf("only hot-plug=%s or %s supported on CLH", config.NoPort, config.RootPort) } } diff --git a/src/runtime/pkg/katautils/create_test.go b/src/runtime/pkg/katautils/create_test.go index 903e68d95dea..a0ec8ae07579 100644 --- a/src/runtime/pkg/katautils/create_test.go +++ b/src/runtime/pkg/katautils/create_test.go @@ -431,9 +431,11 @@ func TestVfioChecksClh(t *testing.T) { } assert.NoError(f(config.NoPort, config.NoPort)) assert.NoError(f(config.NoPort, config.RootPort)) + assert.NoError(f(config.RootPort, config.NoPort)) assert.Error(f(config.RootPort, config.RootPort)) - assert.Error(f(config.RootPort, config.NoPort)) assert.Error(f(config.NoPort, config.SwitchPort)) + assert.Error(f(config.SwitchPort, config.NoPort)) + assert.Error(f(config.BridgePort, config.NoPort)) } func TestVfioCheckQemu(t *testing.T) { diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 844004928ba0..5ad4a6bb0e2f 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -968,6 +968,44 @@ func (clh *cloudHypervisor) hotplugAddBlockDevice(drive *config.BlockDrive) erro return err } +// coldPlugVFIODevice appends a VFIO device to the VM configuration so that it +// is present when the VM is created (before boot). Cloud Hypervisor's CreateVM +// API accepts a list of devices that are attached at VM creation time, which +// effectively provides cold-plug semantics — the guest sees the device on its +// PCI bus from the very first enumeration. +func (clh *cloudHypervisor) coldPlugVFIODevice(device *config.VFIODev) error { + switch device.Type { + case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType: + // Supported PCI VFIO device types for Cloud Hypervisor. + default: + return fmt.Errorf("VFIO device %+v has unsupported type %v; only PCI VFIO devices are supported in Cloud Hypervisor", device, device.Type) + } + if strings.TrimSpace(device.SysfsDev) == "" { + return fmt.Errorf("VFIO device %q has empty or invalid SysfsDev path", device.ID) + } + + clh.Logger().WithFields(log.Fields{ + "device": device.ID, + "sysfs": device.SysfsDev, + "bdf": device.BDF, + }).Info("Cold-plugging VFIO device into VM config") + + clhDevice := *chclient.NewDeviceConfig(device.SysfsDev) + clhDevice.SetIommu(clh.config.IOMMU) + clhDevice.SetId(device.ID) + + if clh.vmconfig.Devices != nil { + *clh.vmconfig.Devices = append(*clh.vmconfig.Devices, clhDevice) + } else { + clh.vmconfig.Devices = &[]chclient.DeviceConfig{clhDevice} + } + + // Track the device ID so that it can be referenced later (e.g. for removal). + clh.devicesIds[device.ID] = device.ID + + return nil +} + func (clh *cloudHypervisor) hotPlugVFIODevice(device *config.VFIODev) error { cl := clh.client() ctx, cancel := context.WithTimeout(context.Background(), clhHotPlugAPITimeout*time.Second) @@ -1334,6 +1372,8 @@ func (clh *cloudHypervisor) AddDevice(ctx context.Context, devInfo interface{}, clh.addVSock(defaultGuestVSockCID, v.UdsPath) case types.Volume: err = clh.addVolume(v) + case config.VFIODev: + err = clh.coldPlugVFIODevice(&v) default: clh.Logger().WithField("function", "AddDevice").Warnf("Add device of type %v is not supported.", v) return fmt.Errorf("Not implemented support for %s", v) diff --git a/src/runtime/virtcontainers/clh_test.go b/src/runtime/virtcontainers/clh_test.go index 0a90982d32ed..5849ed76fbb2 100644 --- a/src/runtime/virtcontainers/clh_test.go +++ b/src/runtime/virtcontainers/clh_test.go @@ -682,6 +682,94 @@ func TestCloudHypervisorHotplugRemoveDevice(t *testing.T) { assert.Error(err, "Hotplug remove pmem block device expected error") } +func TestCloudHypervisorColdPlugVFIODevice(t *testing.T) { + assert := assert.New(t) + + clhConfig, err := newClhConfig() + assert.NoError(err) + + clh := &cloudHypervisor{} + clh.config = clhConfig + clh.devicesIds = make(map[string]string) + clh.vmconfig = *chclient.NewVmConfig(*chclient.NewPayloadConfig()) + + // Cold-plug a PCI VFIO device + dev := &config.VFIODev{ + ID: "gpu0", + SysfsDev: "/sys/bus/pci/devices/0000:41:00.0", + BDF: "0000:41:00.0", + Type: config.VFIOPCIDeviceNormalType, + } + err = clh.coldPlugVFIODevice(dev) + assert.NoError(err, "Cold-plug PCI VFIO device expected no error") + + // Verify the device was added to vmconfig.Devices + assert.NotNil(clh.vmconfig.Devices) + assert.Len(*clh.vmconfig.Devices, 1) + assert.Equal("/sys/bus/pci/devices/0000:41:00.0", (*clh.vmconfig.Devices)[0].Path) + assert.Equal("gpu0", clh.devicesIds["gpu0"]) + + // Cold-plug a second device + dev2 := &config.VFIODev{ + ID: "gpu1", + SysfsDev: "/sys/bus/pci/devices/0000:42:00.0", + BDF: "0000:42:00.0", + Type: config.VFIOPCIDeviceNormalType, + } + err = clh.coldPlugVFIODevice(dev2) + assert.NoError(err, "Cold-plug second VFIO device expected no error") + assert.Len(*clh.vmconfig.Devices, 2) + + // AP mediated device should fail + apDev := &config.VFIODev{ + ID: "ap0", + Type: config.VFIOAPDeviceMediatedType, + } + err = clh.coldPlugVFIODevice(apDev) + assert.Error(err, "Cold-plug AP mediated device expected error") + + // Error type (0) should fail + errDev := &config.VFIODev{ + ID: "bad0", + SysfsDev: "/sys/bus/pci/devices/0000:43:00.0", + Type: config.VFIODeviceErrorType, + } + err = clh.coldPlugVFIODevice(errDev) + assert.Error(err, "Cold-plug error-type device expected error") + + // Empty SysfsDev should fail + emptySysfsDev := &config.VFIODev{ + ID: "bad1", + Type: config.VFIOPCIDeviceNormalType, + } + err = clh.coldPlugVFIODevice(emptySysfsDev) + assert.Error(err, "Cold-plug with empty SysfsDev expected error") +} + +func TestCloudHypervisorAddDeviceVFIO(t *testing.T) { + assert := assert.New(t) + + clhConfig, err := newClhConfig() + assert.NoError(err) + + clh := &cloudHypervisor{} + clh.config = clhConfig + clh.devicesIds = make(map[string]string) + clh.vmconfig = *chclient.NewVmConfig(*chclient.NewPayloadConfig()) + + // AddDevice with VFIODev type should cold-plug + dev := config.VFIODev{ + ID: "nic0", + SysfsDev: "/sys/bus/pci/devices/0000:05:00.0", + BDF: "0000:05:00.0", + Type: config.VFIOPCIDeviceNormalType, + } + err = clh.AddDevice(context.Background(), dev, VfioDev) + assert.NoError(err, "AddDevice VFIO expected no error") + assert.NotNil(clh.vmconfig.Devices) + assert.Len(*clh.vmconfig.Devices, 1) +} + func TestClhGenerateSocket(t *testing.T) { assert := assert.New(t) From e93d640fef612063ce46c677a7c120c3f9fd51a4 Mon Sep 17 00:00:00 2001 From: Saul Paredes Date: Thu, 2 Apr 2026 11:23:26 -0700 Subject: [PATCH 19/36] runtime: regenerate CH client against v51.1 Regenerate CH client against v51.1 Signed-off-by: Saul Paredes --- .../client/.openapi-generator/FILES | 2 + .../pkg/cloud-hypervisor/client/README.md | 2 + .../cloud-hypervisor/client/api/openapi.yaml | 94 +++++++++++ .../cloud-hypervisor/client/api_default.go | 100 ++++++++++++ .../client/docs/CpusConfig.md | 26 +++ .../client/docs/DefaultApi.md | 63 ++++++++ .../client/docs/DiskConfig.md | 78 +++++++++ .../cloud-hypervisor/client/docs/NetConfig.md | 78 +++++++++ .../client/docs/NumaConfig.md | 26 +++ .../client/docs/VmResizeDisk.md | 82 ++++++++++ .../client/model_cpus_config.go | 40 +++++ .../client/model_disk_config.go | 116 ++++++++++++++ .../client/model_net_config.go | 120 ++++++++++++++ .../client/model_numa_config.go | 36 +++++ .../client/model_vm_resize_disk.go | 151 ++++++++++++++++++ .../cloud-hypervisor/cloud-hypervisor.yaml | 51 ++++++ versions.yaml | 2 +- 17 files changed, 1066 insertions(+), 1 deletion(-) create mode 100644 src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/VmResizeDisk.md create mode 100644 src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_vm_resize_disk.go diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/.openapi-generator/FILES b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/.openapi-generator/FILES index 9bde9a8fe3a2..d9a2dc51d91f 100644 --- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/.openapi-generator/FILES +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/.openapi-generator/FILES @@ -45,6 +45,7 @@ docs/VmCoredumpData.md docs/VmInfo.md docs/VmRemoveDevice.md docs/VmResize.md +docs/VmResizeDisk.md docs/VmResizeZone.md docs/VmSnapshotConfig.md docs/VmmPingResponse.md @@ -90,6 +91,7 @@ model_vm_coredump_data.go model_vm_info.go model_vm_remove_device.go model_vm_resize.go +model_vm_resize_disk.go model_vm_resize_zone.go model_vm_snapshot_config.go model_vmm_ping_response.go diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/README.md b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/README.md index 20bdbf5c1558..dd57505ad4f1 100644 --- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/README.md +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/README.md @@ -100,6 +100,7 @@ Class | Method | HTTP request | Description *DefaultApi* | [**VmInfoGet**](docs/DefaultApi.md#vminfoget) | **Get** /vm.info | Returns general information about the cloud-hypervisor Virtual Machine (VM) instance. *DefaultApi* | [**VmReceiveMigrationPut**](docs/DefaultApi.md#vmreceivemigrationput) | **Put** /vm.receive-migration | Receive a VM migration from URL *DefaultApi* | [**VmRemoveDevicePut**](docs/DefaultApi.md#vmremovedeviceput) | **Put** /vm.remove-device | Remove a device from the VM +*DefaultApi* | [**VmResizeDiskPut**](docs/DefaultApi.md#vmresizediskput) | **Put** /vm.resize-disk | Resize a disk *DefaultApi* | [**VmResizePut**](docs/DefaultApi.md#vmresizeput) | **Put** /vm.resize | Resize the VM *DefaultApi* | [**VmResizeZonePut**](docs/DefaultApi.md#vmresizezoneput) | **Put** /vm.resize-zone | Resize a memory zone *DefaultApi* | [**VmRestorePut**](docs/DefaultApi.md#vmrestoreput) | **Put** /vm.restore | Restore a VM from a snapshot. @@ -149,6 +150,7 @@ Class | Method | HTTP request | Description - [VmInfo](docs/VmInfo.md) - [VmRemoveDevice](docs/VmRemoveDevice.md) - [VmResize](docs/VmResize.md) + - [VmResizeDisk](docs/VmResizeDisk.md) - [VmResizeZone](docs/VmResizeZone.md) - [VmSnapshotConfig](docs/VmSnapshotConfig.md) - [VmmPingResponse](docs/VmmPingResponse.md) diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/api/openapi.yaml b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/api/openapi.yaml index 02bf62e935d8..66dd2514cb1c 100644 --- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/api/openapi.yaml +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/api/openapi.yaml @@ -153,6 +153,21 @@ paths: description: The VM instance could not be resized because a cpu removal is still pending. summary: Resize the VM + /vm.resize-disk: + put: + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/VmResizeDisk' + description: Resizes a disk attached to the VM + required: true + responses: + "204": + description: The disk was successfully resized. + "500": + description: The disk could not be resized. + summary: Resize a disk /vm.resize-zone: put: requestBody: @@ -511,6 +526,7 @@ components: rate_limit_group: rate_limit_group queue_size: 6 direct: false + backing_files: false rate_limiter_config: ops: size: 0 @@ -535,12 +551,15 @@ components: iommu: false vhost_socket: vhost_socket serial: serial + sparse: true vhost_user: false id: id + image_type: FixedVhd - num_queues: 9 rate_limit_group: rate_limit_group queue_size: 6 direct: false + backing_files: false rate_limiter_config: ops: size: 0 @@ -565,8 +584,10 @@ components: iommu: false vhost_socket: vhost_socket serial: serial + sparse: true vhost_user: false id: id + image_type: FixedVhd fs: - pci_segment: 6 num_queues: 6 @@ -649,7 +670,9 @@ components: - tap: tap host_mac: host_mac num_queues: 6 + offload_ufo: true queue_size: 1 + offload_csum: true ip: 192.168.249.1 rate_limiter_config: ops: @@ -663,6 +686,7 @@ components: mac: mac mtu: 3 pci_segment: 2 + offload_tso: true vhost_mode: Client iommu: false vhost_socket: vhost_socket @@ -672,7 +696,9 @@ components: - tap: tap host_mac: host_mac num_queues: 6 + offload_ufo: true queue_size: 1 + offload_csum: true ip: 192.168.249.1 rate_limiter_config: ops: @@ -686,6 +712,7 @@ components: mac: mac mtu: 3 pci_segment: 2 + offload_tso: true vhost_mode: Client iommu: false vhost_socket: vhost_socket @@ -714,6 +741,7 @@ components: max_phys_bits: 7 boot_vcpus: 1 max_vcpus: 1 + nested: true affinity: - vcpu: 9 host_cpus: @@ -754,6 +782,7 @@ components: pci_segments: - 5 - 5 + device_id: device_id cpus: - 3 - 3 @@ -769,6 +798,7 @@ components: pci_segments: - 5 - 5 + device_id: device_id cpus: - 3 - 3 @@ -941,6 +971,7 @@ components: rate_limit_group: rate_limit_group queue_size: 6 direct: false + backing_files: false rate_limiter_config: ops: size: 0 @@ -965,12 +996,15 @@ components: iommu: false vhost_socket: vhost_socket serial: serial + sparse: true vhost_user: false id: id + image_type: FixedVhd - num_queues: 9 rate_limit_group: rate_limit_group queue_size: 6 direct: false + backing_files: false rate_limiter_config: ops: size: 0 @@ -995,8 +1029,10 @@ components: iommu: false vhost_socket: vhost_socket serial: serial + sparse: true vhost_user: false id: id + image_type: FixedVhd fs: - pci_segment: 6 num_queues: 6 @@ -1079,7 +1115,9 @@ components: - tap: tap host_mac: host_mac num_queues: 6 + offload_ufo: true queue_size: 1 + offload_csum: true ip: 192.168.249.1 rate_limiter_config: ops: @@ -1093,6 +1131,7 @@ components: mac: mac mtu: 3 pci_segment: 2 + offload_tso: true vhost_mode: Client iommu: false vhost_socket: vhost_socket @@ -1102,7 +1141,9 @@ components: - tap: tap host_mac: host_mac num_queues: 6 + offload_ufo: true queue_size: 1 + offload_csum: true ip: 192.168.249.1 rate_limiter_config: ops: @@ -1116,6 +1157,7 @@ components: mac: mac mtu: 3 pci_segment: 2 + offload_tso: true vhost_mode: Client iommu: false vhost_socket: vhost_socket @@ -1144,6 +1186,7 @@ components: max_phys_bits: 7 boot_vcpus: 1 max_vcpus: 1 + nested: true affinity: - vcpu: 9 host_cpus: @@ -1184,6 +1227,7 @@ components: pci_segments: - 5 - 5 + device_id: device_id cpus: - 3 - 3 @@ -1199,6 +1243,7 @@ components: pci_segments: - 5 - 5 + device_id: device_id cpus: - 3 - 3 @@ -1361,6 +1406,7 @@ components: max_phys_bits: 7 boot_vcpus: 1 max_vcpus: 1 + nested: true affinity: - vcpu: 9 host_cpus: @@ -1384,6 +1430,9 @@ components: type: boolean max_phys_bits: type: integer + nested: + default: true + type: boolean affinity: items: $ref: '#/components/schemas/CpuAffinity' @@ -1671,6 +1720,7 @@ components: rate_limit_group: rate_limit_group queue_size: 6 direct: false + backing_files: false rate_limiter_config: ops: size: 0 @@ -1695,8 +1745,10 @@ components: iommu: false vhost_socket: vhost_socket serial: serial + sparse: true vhost_user: false id: id + image_type: FixedVhd properties: path: type: string @@ -1735,13 +1787,29 @@ components: items: $ref: '#/components/schemas/VirtQueueAffinity' type: array + backing_files: + default: false + type: boolean + sparse: + default: true + type: boolean + image_type: + enum: + - FixedVhd + - Qcow2 + - Raw + - Vhdx + - Unknown + type: string type: object NetConfig: example: tap: tap host_mac: host_mac num_queues: 6 + offload_ufo: true queue_size: 1 + offload_csum: true ip: 192.168.249.1 rate_limiter_config: ops: @@ -1755,6 +1823,7 @@ components: mac: mac mtu: 3 pci_segment: 2 + offload_tso: true vhost_mode: Client iommu: false vhost_socket: vhost_socket @@ -1803,6 +1872,15 @@ components: type: integer rate_limiter_config: $ref: '#/components/schemas/RateLimiterConfig' + offload_tso: + default: true + type: boolean + offload_ufo: + default: true + type: boolean + offload_csum: + default: true + type: boolean type: object RngConfig: example: @@ -2053,6 +2131,7 @@ components: pci_segments: - 5 - 5 + device_id: device_id cpus: - 3 - 3 @@ -2082,6 +2161,8 @@ components: format: int32 type: integer type: array + device_id: + type: string required: - guest_numa_id type: object @@ -2103,6 +2184,19 @@ components: format: int64 type: integer type: object + VmResizeDisk: + example: + desired_size: 0 + id: id + properties: + id: + description: disk identifier + type: string + desired_size: + description: desired disk size in bytes + format: int64 + type: integer + type: object VmResizeZone: example: id: id diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/api_default.go b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/api_default.go index 4379918ea688..5f29081f006a 100644 --- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/api_default.go +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/api_default.go @@ -2226,6 +2226,106 @@ func (a *DefaultApiService) VmRemoveDevicePutExecute(r ApiVmRemoveDevicePutReque return localVarHTTPResponse, nil } +type ApiVmResizeDiskPutRequest struct { + ctx _context.Context + ApiService *DefaultApiService + vmResizeDisk *VmResizeDisk +} + +// Resizes a disk attached to the VM +func (r ApiVmResizeDiskPutRequest) VmResizeDisk(vmResizeDisk VmResizeDisk) ApiVmResizeDiskPutRequest { + r.vmResizeDisk = &vmResizeDisk + return r +} + +func (r ApiVmResizeDiskPutRequest) Execute() (*_nethttp.Response, error) { + return r.ApiService.VmResizeDiskPutExecute(r) +} + +/* +VmResizeDiskPut Resize a disk + + @param ctx _context.Context - for authentication, logging, cancellation, deadlines, tracing, etc. Passed from http.Request or context.Background(). + @return ApiVmResizeDiskPutRequest +*/ +func (a *DefaultApiService) VmResizeDiskPut(ctx _context.Context) ApiVmResizeDiskPutRequest { + return ApiVmResizeDiskPutRequest{ + ApiService: a, + ctx: ctx, + } +} + +// Execute executes the request +func (a *DefaultApiService) VmResizeDiskPutExecute(r ApiVmResizeDiskPutRequest) (*_nethttp.Response, error) { + var ( + localVarHTTPMethod = _nethttp.MethodPut + localVarPostBody interface{} + localVarFormFileName string + localVarFileName string + localVarFileBytes []byte + ) + + localBasePath, err := a.client.cfg.ServerURLWithContext(r.ctx, "DefaultApiService.VmResizeDiskPut") + if err != nil { + return nil, GenericOpenAPIError{error: err.Error()} + } + + localVarPath := localBasePath + "/vm.resize-disk" + + localVarHeaderParams := make(map[string]string) + localVarQueryParams := _neturl.Values{} + localVarFormParams := _neturl.Values{} + if r.vmResizeDisk == nil { + return nil, reportError("vmResizeDisk is required and must be specified") + } + + // to determine the Content-Type header + localVarHTTPContentTypes := []string{"application/json"} + + // set Content-Type header + localVarHTTPContentType := selectHeaderContentType(localVarHTTPContentTypes) + if localVarHTTPContentType != "" { + localVarHeaderParams["Content-Type"] = localVarHTTPContentType + } + + // to determine the Accept header + localVarHTTPHeaderAccepts := []string{} + + // set Accept header + localVarHTTPHeaderAccept := selectHeaderAccept(localVarHTTPHeaderAccepts) + if localVarHTTPHeaderAccept != "" { + localVarHeaderParams["Accept"] = localVarHTTPHeaderAccept + } + // body params + localVarPostBody = r.vmResizeDisk + req, err := a.client.prepareRequest(r.ctx, localVarPath, localVarHTTPMethod, localVarPostBody, localVarHeaderParams, localVarQueryParams, localVarFormParams, localVarFormFileName, localVarFileName, localVarFileBytes) + if err != nil { + return nil, err + } + + localVarHTTPResponse, err := a.client.callAPI(req) + if err != nil || localVarHTTPResponse == nil { + return localVarHTTPResponse, err + } + + localVarBody, err := _ioutil.ReadAll(localVarHTTPResponse.Body) + localVarHTTPResponse.Body.Close() + localVarHTTPResponse.Body = _ioutil.NopCloser(bytes.NewBuffer(localVarBody)) + if err != nil { + return localVarHTTPResponse, err + } + + if localVarHTTPResponse.StatusCode >= 300 { + newErr := GenericOpenAPIError{ + body: localVarBody, + error: localVarHTTPResponse.Status, + } + return localVarHTTPResponse, newErr + } + + return localVarHTTPResponse, nil +} + type ApiVmResizePutRequest struct { ctx _context.Context ApiService *DefaultApiService diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/CpusConfig.md b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/CpusConfig.md index 0b81feebfe5d..8ebcf9240901 100644 --- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/CpusConfig.md +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/CpusConfig.md @@ -9,6 +9,7 @@ Name | Type | Description | Notes **Topology** | Pointer to [**CpuTopology**](CpuTopology.md) | | [optional] **KvmHyperv** | Pointer to **bool** | | [optional] [default to false] **MaxPhysBits** | Pointer to **int32** | | [optional] +**Nested** | Pointer to **bool** | | [optional] [default to true] **Affinity** | Pointer to [**[]CpuAffinity**](CpuAffinity.md) | | [optional] **Features** | Pointer to [**CpuFeatures**](CpuFeatures.md) | | [optional] @@ -146,6 +147,31 @@ SetMaxPhysBits sets MaxPhysBits field to given value. HasMaxPhysBits returns a boolean if a field has been set. +### GetNested + +`func (o *CpusConfig) GetNested() bool` + +GetNested returns the Nested field if non-nil, zero value otherwise. + +### GetNestedOk + +`func (o *CpusConfig) GetNestedOk() (*bool, bool)` + +GetNestedOk returns a tuple with the Nested field if it's non-nil, zero value otherwise +and a boolean to check if the value has been set. + +### SetNested + +`func (o *CpusConfig) SetNested(v bool)` + +SetNested sets Nested field to given value. + +### HasNested + +`func (o *CpusConfig) HasNested() bool` + +HasNested returns a boolean if a field has been set. + ### GetAffinity `func (o *CpusConfig) GetAffinity() []CpuAffinity` diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/DefaultApi.md b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/DefaultApi.md index e4cac03fdc15..9b24ac3a4f01 100644 --- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/DefaultApi.md +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/DefaultApi.md @@ -26,6 +26,7 @@ Method | HTTP request | Description [**VmInfoGet**](DefaultApi.md#VmInfoGet) | **Get** /vm.info | Returns general information about the cloud-hypervisor Virtual Machine (VM) instance. [**VmReceiveMigrationPut**](DefaultApi.md#VmReceiveMigrationPut) | **Put** /vm.receive-migration | Receive a VM migration from URL [**VmRemoveDevicePut**](DefaultApi.md#VmRemoveDevicePut) | **Put** /vm.remove-device | Remove a device from the VM +[**VmResizeDiskPut**](DefaultApi.md#VmResizeDiskPut) | **Put** /vm.resize-disk | Resize a disk [**VmResizePut**](DefaultApi.md#VmResizePut) | **Put** /vm.resize | Resize the VM [**VmResizeZonePut**](DefaultApi.md#VmResizeZonePut) | **Put** /vm.resize-zone | Resize a memory zone [**VmRestorePut**](DefaultApi.md#VmRestorePut) | **Put** /vm.restore | Restore a VM from a snapshot. @@ -1370,6 +1371,68 @@ No authorization required [[Back to README]](../README.md) +## VmResizeDiskPut + +> VmResizeDiskPut(ctx).VmResizeDisk(vmResizeDisk).Execute() + +Resize a disk + +### Example + +```go +package main + +import ( + "context" + "fmt" + "os" + openapiclient "./openapi" +) + +func main() { + vmResizeDisk := *openapiclient.NewVmResizeDisk() // VmResizeDisk | Resizes a disk attached to the VM + + configuration := openapiclient.NewConfiguration() + api_client := openapiclient.NewAPIClient(configuration) + resp, r, err := api_client.DefaultApi.VmResizeDiskPut(context.Background()).VmResizeDisk(vmResizeDisk).Execute() + if err != nil { + fmt.Fprintf(os.Stderr, "Error when calling `DefaultApi.VmResizeDiskPut``: %v\n", err) + fmt.Fprintf(os.Stderr, "Full HTTP response: %v\n", r) + } +} +``` + +### Path Parameters + + + +### Other Parameters + +Other parameters are passed through a pointer to a apiVmResizeDiskPutRequest struct via the builder pattern + + +Name | Type | Description | Notes +------------- | ------------- | ------------- | ------------- + **vmResizeDisk** | [**VmResizeDisk**](VmResizeDisk.md) | Resizes a disk attached to the VM | + +### Return type + + (empty response body) + +### Authorization + +No authorization required + +### HTTP request headers + +- **Content-Type**: application/json +- **Accept**: Not defined + +[[Back to top]](#) [[Back to API list]](../README.md#documentation-for-api-endpoints) +[[Back to Model list]](../README.md#documentation-for-models) +[[Back to README]](../README.md) + + ## VmResizePut > VmResizePut(ctx).VmResize(vmResize).Execute() diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/DiskConfig.md b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/DiskConfig.md index c50d4b35d097..16cc217f2022 100644 --- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/DiskConfig.md +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/DiskConfig.md @@ -18,6 +18,9 @@ Name | Type | Description | Notes **Serial** | Pointer to **string** | | [optional] **RateLimitGroup** | Pointer to **string** | | [optional] **QueueAffinity** | Pointer to [**[]VirtQueueAffinity**](VirtQueueAffinity.md) | | [optional] +**BackingFiles** | Pointer to **bool** | | [optional] [default to false] +**Sparse** | Pointer to **bool** | | [optional] [default to true] +**ImageType** | Pointer to **string** | | [optional] ## Methods @@ -388,6 +391,81 @@ SetQueueAffinity sets QueueAffinity field to given value. HasQueueAffinity returns a boolean if a field has been set. +### GetBackingFiles + +`func (o *DiskConfig) GetBackingFiles() bool` + +GetBackingFiles returns the BackingFiles field if non-nil, zero value otherwise. + +### GetBackingFilesOk + +`func (o *DiskConfig) GetBackingFilesOk() (*bool, bool)` + +GetBackingFilesOk returns a tuple with the BackingFiles field if it's non-nil, zero value otherwise +and a boolean to check if the value has been set. + +### SetBackingFiles + +`func (o *DiskConfig) SetBackingFiles(v bool)` + +SetBackingFiles sets BackingFiles field to given value. + +### HasBackingFiles + +`func (o *DiskConfig) HasBackingFiles() bool` + +HasBackingFiles returns a boolean if a field has been set. + +### GetSparse + +`func (o *DiskConfig) GetSparse() bool` + +GetSparse returns the Sparse field if non-nil, zero value otherwise. + +### GetSparseOk + +`func (o *DiskConfig) GetSparseOk() (*bool, bool)` + +GetSparseOk returns a tuple with the Sparse field if it's non-nil, zero value otherwise +and a boolean to check if the value has been set. + +### SetSparse + +`func (o *DiskConfig) SetSparse(v bool)` + +SetSparse sets Sparse field to given value. + +### HasSparse + +`func (o *DiskConfig) HasSparse() bool` + +HasSparse returns a boolean if a field has been set. + +### GetImageType + +`func (o *DiskConfig) GetImageType() string` + +GetImageType returns the ImageType field if non-nil, zero value otherwise. + +### GetImageTypeOk + +`func (o *DiskConfig) GetImageTypeOk() (*string, bool)` + +GetImageTypeOk returns a tuple with the ImageType field if it's non-nil, zero value otherwise +and a boolean to check if the value has been set. + +### SetImageType + +`func (o *DiskConfig) SetImageType(v string)` + +SetImageType sets ImageType field to given value. + +### HasImageType + +`func (o *DiskConfig) HasImageType() bool` + +HasImageType returns a boolean if a field has been set. + [[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/NetConfig.md b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/NetConfig.md index 78ae659c6cea..f0e505682d4d 100644 --- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/NetConfig.md +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/NetConfig.md @@ -19,6 +19,9 @@ Name | Type | Description | Notes **Id** | Pointer to **string** | | [optional] **PciSegment** | Pointer to **int32** | | [optional] **RateLimiterConfig** | Pointer to [**RateLimiterConfig**](RateLimiterConfig.md) | | [optional] +**OffloadTso** | Pointer to **bool** | | [optional] [default to true] +**OffloadUfo** | Pointer to **bool** | | [optional] [default to true] +**OffloadCsum** | Pointer to **bool** | | [optional] [default to true] ## Methods @@ -414,6 +417,81 @@ SetRateLimiterConfig sets RateLimiterConfig field to given value. HasRateLimiterConfig returns a boolean if a field has been set. +### GetOffloadTso + +`func (o *NetConfig) GetOffloadTso() bool` + +GetOffloadTso returns the OffloadTso field if non-nil, zero value otherwise. + +### GetOffloadTsoOk + +`func (o *NetConfig) GetOffloadTsoOk() (*bool, bool)` + +GetOffloadTsoOk returns a tuple with the OffloadTso field if it's non-nil, zero value otherwise +and a boolean to check if the value has been set. + +### SetOffloadTso + +`func (o *NetConfig) SetOffloadTso(v bool)` + +SetOffloadTso sets OffloadTso field to given value. + +### HasOffloadTso + +`func (o *NetConfig) HasOffloadTso() bool` + +HasOffloadTso returns a boolean if a field has been set. + +### GetOffloadUfo + +`func (o *NetConfig) GetOffloadUfo() bool` + +GetOffloadUfo returns the OffloadUfo field if non-nil, zero value otherwise. + +### GetOffloadUfoOk + +`func (o *NetConfig) GetOffloadUfoOk() (*bool, bool)` + +GetOffloadUfoOk returns a tuple with the OffloadUfo field if it's non-nil, zero value otherwise +and a boolean to check if the value has been set. + +### SetOffloadUfo + +`func (o *NetConfig) SetOffloadUfo(v bool)` + +SetOffloadUfo sets OffloadUfo field to given value. + +### HasOffloadUfo + +`func (o *NetConfig) HasOffloadUfo() bool` + +HasOffloadUfo returns a boolean if a field has been set. + +### GetOffloadCsum + +`func (o *NetConfig) GetOffloadCsum() bool` + +GetOffloadCsum returns the OffloadCsum field if non-nil, zero value otherwise. + +### GetOffloadCsumOk + +`func (o *NetConfig) GetOffloadCsumOk() (*bool, bool)` + +GetOffloadCsumOk returns a tuple with the OffloadCsum field if it's non-nil, zero value otherwise +and a boolean to check if the value has been set. + +### SetOffloadCsum + +`func (o *NetConfig) SetOffloadCsum(v bool)` + +SetOffloadCsum sets OffloadCsum field to given value. + +### HasOffloadCsum + +`func (o *NetConfig) HasOffloadCsum() bool` + +HasOffloadCsum returns a boolean if a field has been set. + [[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/NumaConfig.md b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/NumaConfig.md index cc724130cd67..a95bde36e61e 100644 --- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/NumaConfig.md +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/NumaConfig.md @@ -9,6 +9,7 @@ Name | Type | Description | Notes **Distances** | Pointer to [**[]NumaDistance**](NumaDistance.md) | | [optional] **MemoryZones** | Pointer to **[]string** | | [optional] **PciSegments** | Pointer to **[]int32** | | [optional] +**DeviceId** | Pointer to **string** | | [optional] ## Methods @@ -149,6 +150,31 @@ SetPciSegments sets PciSegments field to given value. HasPciSegments returns a boolean if a field has been set. +### GetDeviceId + +`func (o *NumaConfig) GetDeviceId() string` + +GetDeviceId returns the DeviceId field if non-nil, zero value otherwise. + +### GetDeviceIdOk + +`func (o *NumaConfig) GetDeviceIdOk() (*string, bool)` + +GetDeviceIdOk returns a tuple with the DeviceId field if it's non-nil, zero value otherwise +and a boolean to check if the value has been set. + +### SetDeviceId + +`func (o *NumaConfig) SetDeviceId(v string)` + +SetDeviceId sets DeviceId field to given value. + +### HasDeviceId + +`func (o *NumaConfig) HasDeviceId() bool` + +HasDeviceId returns a boolean if a field has been set. + [[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/VmResizeDisk.md b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/VmResizeDisk.md new file mode 100644 index 000000000000..61fb8b7d8810 --- /dev/null +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/VmResizeDisk.md @@ -0,0 +1,82 @@ +# VmResizeDisk + +## Properties + +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +**Id** | Pointer to **string** | disk identifier | [optional] +**DesiredSize** | Pointer to **int64** | desired disk size in bytes | [optional] + +## Methods + +### NewVmResizeDisk + +`func NewVmResizeDisk() *VmResizeDisk` + +NewVmResizeDisk instantiates a new VmResizeDisk object +This constructor will assign default values to properties that have it defined, +and makes sure properties required by API are set, but the set of arguments +will change when the set of required properties is changed + +### NewVmResizeDiskWithDefaults + +`func NewVmResizeDiskWithDefaults() *VmResizeDisk` + +NewVmResizeDiskWithDefaults instantiates a new VmResizeDisk object +This constructor will only assign default values to properties that have it defined, +but it doesn't guarantee that properties required by API are set + +### GetId + +`func (o *VmResizeDisk) GetId() string` + +GetId returns the Id field if non-nil, zero value otherwise. + +### GetIdOk + +`func (o *VmResizeDisk) GetIdOk() (*string, bool)` + +GetIdOk returns a tuple with the Id field if it's non-nil, zero value otherwise +and a boolean to check if the value has been set. + +### SetId + +`func (o *VmResizeDisk) SetId(v string)` + +SetId sets Id field to given value. + +### HasId + +`func (o *VmResizeDisk) HasId() bool` + +HasId returns a boolean if a field has been set. + +### GetDesiredSize + +`func (o *VmResizeDisk) GetDesiredSize() int64` + +GetDesiredSize returns the DesiredSize field if non-nil, zero value otherwise. + +### GetDesiredSizeOk + +`func (o *VmResizeDisk) GetDesiredSizeOk() (*int64, bool)` + +GetDesiredSizeOk returns a tuple with the DesiredSize field if it's non-nil, zero value otherwise +and a boolean to check if the value has been set. + +### SetDesiredSize + +`func (o *VmResizeDisk) SetDesiredSize(v int64)` + +SetDesiredSize sets DesiredSize field to given value. + +### HasDesiredSize + +`func (o *VmResizeDisk) HasDesiredSize() bool` + +HasDesiredSize returns a boolean if a field has been set. + + +[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) + + diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_cpus_config.go b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_cpus_config.go index 33dd65cf83b1..058bb7d5ef37 100644 --- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_cpus_config.go +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_cpus_config.go @@ -21,6 +21,7 @@ type CpusConfig struct { Topology *CpuTopology `json:"topology,omitempty"` KvmHyperv *bool `json:"kvm_hyperv,omitempty"` MaxPhysBits *int32 `json:"max_phys_bits,omitempty"` + Nested *bool `json:"nested,omitempty"` Affinity *[]CpuAffinity `json:"affinity,omitempty"` Features *CpuFeatures `json:"features,omitempty"` } @@ -35,6 +36,8 @@ func NewCpusConfig(bootVcpus int32, maxVcpus int32) *CpusConfig { this.MaxVcpus = maxVcpus var kvmHyperv bool = false this.KvmHyperv = &kvmHyperv + var nested bool = true + this.Nested = &nested return &this } @@ -45,6 +48,8 @@ func NewCpusConfigWithDefaults() *CpusConfig { this := CpusConfig{} var kvmHyperv bool = false this.KvmHyperv = &kvmHyperv + var nested bool = true + this.Nested = &nested return &this } @@ -192,6 +197,38 @@ func (o *CpusConfig) SetMaxPhysBits(v int32) { o.MaxPhysBits = &v } +// GetNested returns the Nested field value if set, zero value otherwise. +func (o *CpusConfig) GetNested() bool { + if o == nil || o.Nested == nil { + var ret bool + return ret + } + return *o.Nested +} + +// GetNestedOk returns a tuple with the Nested field value if set, nil otherwise +// and a boolean to check if the value has been set. +func (o *CpusConfig) GetNestedOk() (*bool, bool) { + if o == nil || o.Nested == nil { + return nil, false + } + return o.Nested, true +} + +// HasNested returns a boolean if a field has been set. +func (o *CpusConfig) HasNested() bool { + if o != nil && o.Nested != nil { + return true + } + + return false +} + +// SetNested gets a reference to the given bool and assigns it to the Nested field. +func (o *CpusConfig) SetNested(v bool) { + o.Nested = &v +} + // GetAffinity returns the Affinity field value if set, zero value otherwise. func (o *CpusConfig) GetAffinity() []CpuAffinity { if o == nil || o.Affinity == nil { @@ -273,6 +310,9 @@ func (o CpusConfig) MarshalJSON() ([]byte, error) { if o.MaxPhysBits != nil { toSerialize["max_phys_bits"] = o.MaxPhysBits } + if o.Nested != nil { + toSerialize["nested"] = o.Nested + } if o.Affinity != nil { toSerialize["affinity"] = o.Affinity } diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_disk_config.go b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_disk_config.go index f898c94fd3e3..9c2357a6d245 100644 --- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_disk_config.go +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_disk_config.go @@ -30,6 +30,9 @@ type DiskConfig struct { Serial *string `json:"serial,omitempty"` RateLimitGroup *string `json:"rate_limit_group,omitempty"` QueueAffinity *[]VirtQueueAffinity `json:"queue_affinity,omitempty"` + BackingFiles *bool `json:"backing_files,omitempty"` + Sparse *bool `json:"sparse,omitempty"` + ImageType *string `json:"image_type,omitempty"` } // NewDiskConfig instantiates a new DiskConfig object @@ -50,6 +53,10 @@ func NewDiskConfig() *DiskConfig { this.QueueSize = &queueSize var vhostUser bool = false this.VhostUser = &vhostUser + var backingFiles bool = false + this.BackingFiles = &backingFiles + var sparse bool = true + this.Sparse = &sparse return &this } @@ -70,6 +77,10 @@ func NewDiskConfigWithDefaults() *DiskConfig { this.QueueSize = &queueSize var vhostUser bool = false this.VhostUser = &vhostUser + var backingFiles bool = false + this.BackingFiles = &backingFiles + var sparse bool = true + this.Sparse = &sparse return &this } @@ -521,6 +532,102 @@ func (o *DiskConfig) SetQueueAffinity(v []VirtQueueAffinity) { o.QueueAffinity = &v } +// GetBackingFiles returns the BackingFiles field value if set, zero value otherwise. +func (o *DiskConfig) GetBackingFiles() bool { + if o == nil || o.BackingFiles == nil { + var ret bool + return ret + } + return *o.BackingFiles +} + +// GetBackingFilesOk returns a tuple with the BackingFiles field value if set, nil otherwise +// and a boolean to check if the value has been set. +func (o *DiskConfig) GetBackingFilesOk() (*bool, bool) { + if o == nil || o.BackingFiles == nil { + return nil, false + } + return o.BackingFiles, true +} + +// HasBackingFiles returns a boolean if a field has been set. +func (o *DiskConfig) HasBackingFiles() bool { + if o != nil && o.BackingFiles != nil { + return true + } + + return false +} + +// SetBackingFiles gets a reference to the given bool and assigns it to the BackingFiles field. +func (o *DiskConfig) SetBackingFiles(v bool) { + o.BackingFiles = &v +} + +// GetSparse returns the Sparse field value if set, zero value otherwise. +func (o *DiskConfig) GetSparse() bool { + if o == nil || o.Sparse == nil { + var ret bool + return ret + } + return *o.Sparse +} + +// GetSparseOk returns a tuple with the Sparse field value if set, nil otherwise +// and a boolean to check if the value has been set. +func (o *DiskConfig) GetSparseOk() (*bool, bool) { + if o == nil || o.Sparse == nil { + return nil, false + } + return o.Sparse, true +} + +// HasSparse returns a boolean if a field has been set. +func (o *DiskConfig) HasSparse() bool { + if o != nil && o.Sparse != nil { + return true + } + + return false +} + +// SetSparse gets a reference to the given bool and assigns it to the Sparse field. +func (o *DiskConfig) SetSparse(v bool) { + o.Sparse = &v +} + +// GetImageType returns the ImageType field value if set, zero value otherwise. +func (o *DiskConfig) GetImageType() string { + if o == nil || o.ImageType == nil { + var ret string + return ret + } + return *o.ImageType +} + +// GetImageTypeOk returns a tuple with the ImageType field value if set, nil otherwise +// and a boolean to check if the value has been set. +func (o *DiskConfig) GetImageTypeOk() (*string, bool) { + if o == nil || o.ImageType == nil { + return nil, false + } + return o.ImageType, true +} + +// HasImageType returns a boolean if a field has been set. +func (o *DiskConfig) HasImageType() bool { + if o != nil && o.ImageType != nil { + return true + } + + return false +} + +// SetImageType gets a reference to the given string and assigns it to the ImageType field. +func (o *DiskConfig) SetImageType(v string) { + o.ImageType = &v +} + func (o DiskConfig) MarshalJSON() ([]byte, error) { toSerialize := map[string]interface{}{} if o.Path != nil { @@ -565,6 +672,15 @@ func (o DiskConfig) MarshalJSON() ([]byte, error) { if o.QueueAffinity != nil { toSerialize["queue_affinity"] = o.QueueAffinity } + if o.BackingFiles != nil { + toSerialize["backing_files"] = o.BackingFiles + } + if o.Sparse != nil { + toSerialize["sparse"] = o.Sparse + } + if o.ImageType != nil { + toSerialize["image_type"] = o.ImageType + } return json.Marshal(toSerialize) } diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_net_config.go b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_net_config.go index 330206757911..392506531510 100644 --- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_net_config.go +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_net_config.go @@ -33,6 +33,9 @@ type NetConfig struct { Id *string `json:"id,omitempty"` PciSegment *int32 `json:"pci_segment,omitempty"` RateLimiterConfig *RateLimiterConfig `json:"rate_limiter_config,omitempty"` + OffloadTso *bool `json:"offload_tso,omitempty"` + OffloadUfo *bool `json:"offload_ufo,omitempty"` + OffloadCsum *bool `json:"offload_csum,omitempty"` } // NewNetConfig instantiates a new NetConfig object @@ -55,6 +58,12 @@ func NewNetConfig() *NetConfig { this.VhostUser = &vhostUser var vhostMode string = "Client" this.VhostMode = &vhostMode + var offloadTso bool = true + this.OffloadTso = &offloadTso + var offloadUfo bool = true + this.OffloadUfo = &offloadUfo + var offloadCsum bool = true + this.OffloadCsum = &offloadCsum return &this } @@ -77,6 +86,12 @@ func NewNetConfigWithDefaults() *NetConfig { this.VhostUser = &vhostUser var vhostMode string = "Client" this.VhostMode = &vhostMode + var offloadTso bool = true + this.OffloadTso = &offloadTso + var offloadUfo bool = true + this.OffloadUfo = &offloadUfo + var offloadCsum bool = true + this.OffloadCsum = &offloadCsum return &this } @@ -560,6 +575,102 @@ func (o *NetConfig) SetRateLimiterConfig(v RateLimiterConfig) { o.RateLimiterConfig = &v } +// GetOffloadTso returns the OffloadTso field value if set, zero value otherwise. +func (o *NetConfig) GetOffloadTso() bool { + if o == nil || o.OffloadTso == nil { + var ret bool + return ret + } + return *o.OffloadTso +} + +// GetOffloadTsoOk returns a tuple with the OffloadTso field value if set, nil otherwise +// and a boolean to check if the value has been set. +func (o *NetConfig) GetOffloadTsoOk() (*bool, bool) { + if o == nil || o.OffloadTso == nil { + return nil, false + } + return o.OffloadTso, true +} + +// HasOffloadTso returns a boolean if a field has been set. +func (o *NetConfig) HasOffloadTso() bool { + if o != nil && o.OffloadTso != nil { + return true + } + + return false +} + +// SetOffloadTso gets a reference to the given bool and assigns it to the OffloadTso field. +func (o *NetConfig) SetOffloadTso(v bool) { + o.OffloadTso = &v +} + +// GetOffloadUfo returns the OffloadUfo field value if set, zero value otherwise. +func (o *NetConfig) GetOffloadUfo() bool { + if o == nil || o.OffloadUfo == nil { + var ret bool + return ret + } + return *o.OffloadUfo +} + +// GetOffloadUfoOk returns a tuple with the OffloadUfo field value if set, nil otherwise +// and a boolean to check if the value has been set. +func (o *NetConfig) GetOffloadUfoOk() (*bool, bool) { + if o == nil || o.OffloadUfo == nil { + return nil, false + } + return o.OffloadUfo, true +} + +// HasOffloadUfo returns a boolean if a field has been set. +func (o *NetConfig) HasOffloadUfo() bool { + if o != nil && o.OffloadUfo != nil { + return true + } + + return false +} + +// SetOffloadUfo gets a reference to the given bool and assigns it to the OffloadUfo field. +func (o *NetConfig) SetOffloadUfo(v bool) { + o.OffloadUfo = &v +} + +// GetOffloadCsum returns the OffloadCsum field value if set, zero value otherwise. +func (o *NetConfig) GetOffloadCsum() bool { + if o == nil || o.OffloadCsum == nil { + var ret bool + return ret + } + return *o.OffloadCsum +} + +// GetOffloadCsumOk returns a tuple with the OffloadCsum field value if set, nil otherwise +// and a boolean to check if the value has been set. +func (o *NetConfig) GetOffloadCsumOk() (*bool, bool) { + if o == nil || o.OffloadCsum == nil { + return nil, false + } + return o.OffloadCsum, true +} + +// HasOffloadCsum returns a boolean if a field has been set. +func (o *NetConfig) HasOffloadCsum() bool { + if o != nil && o.OffloadCsum != nil { + return true + } + + return false +} + +// SetOffloadCsum gets a reference to the given bool and assigns it to the OffloadCsum field. +func (o *NetConfig) SetOffloadCsum(v bool) { + o.OffloadCsum = &v +} + func (o NetConfig) MarshalJSON() ([]byte, error) { toSerialize := map[string]interface{}{} if o.Tap != nil { @@ -607,6 +718,15 @@ func (o NetConfig) MarshalJSON() ([]byte, error) { if o.RateLimiterConfig != nil { toSerialize["rate_limiter_config"] = o.RateLimiterConfig } + if o.OffloadTso != nil { + toSerialize["offload_tso"] = o.OffloadTso + } + if o.OffloadUfo != nil { + toSerialize["offload_ufo"] = o.OffloadUfo + } + if o.OffloadCsum != nil { + toSerialize["offload_csum"] = o.OffloadCsum + } return json.Marshal(toSerialize) } diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_numa_config.go b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_numa_config.go index f17d75710134..1596cdb29fd2 100644 --- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_numa_config.go +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_numa_config.go @@ -21,6 +21,7 @@ type NumaConfig struct { Distances *[]NumaDistance `json:"distances,omitempty"` MemoryZones *[]string `json:"memory_zones,omitempty"` PciSegments *[]int32 `json:"pci_segments,omitempty"` + DeviceId *string `json:"device_id,omitempty"` } // NewNumaConfig instantiates a new NumaConfig object @@ -193,6 +194,38 @@ func (o *NumaConfig) SetPciSegments(v []int32) { o.PciSegments = &v } +// GetDeviceId returns the DeviceId field value if set, zero value otherwise. +func (o *NumaConfig) GetDeviceId() string { + if o == nil || o.DeviceId == nil { + var ret string + return ret + } + return *o.DeviceId +} + +// GetDeviceIdOk returns a tuple with the DeviceId field value if set, nil otherwise +// and a boolean to check if the value has been set. +func (o *NumaConfig) GetDeviceIdOk() (*string, bool) { + if o == nil || o.DeviceId == nil { + return nil, false + } + return o.DeviceId, true +} + +// HasDeviceId returns a boolean if a field has been set. +func (o *NumaConfig) HasDeviceId() bool { + if o != nil && o.DeviceId != nil { + return true + } + + return false +} + +// SetDeviceId gets a reference to the given string and assigns it to the DeviceId field. +func (o *NumaConfig) SetDeviceId(v string) { + o.DeviceId = &v +} + func (o NumaConfig) MarshalJSON() ([]byte, error) { toSerialize := map[string]interface{}{} if true { @@ -210,6 +243,9 @@ func (o NumaConfig) MarshalJSON() ([]byte, error) { if o.PciSegments != nil { toSerialize["pci_segments"] = o.PciSegments } + if o.DeviceId != nil { + toSerialize["device_id"] = o.DeviceId + } return json.Marshal(toSerialize) } diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_vm_resize_disk.go b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_vm_resize_disk.go new file mode 100644 index 000000000000..2125a48e5957 --- /dev/null +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_vm_resize_disk.go @@ -0,0 +1,151 @@ +/* +Cloud Hypervisor API + +Local HTTP based API for managing and inspecting a cloud-hypervisor virtual machine. + +API version: 0.3.0 +*/ + +// Code generated by OpenAPI Generator (https://openapi-generator.tech); DO NOT EDIT. + +package openapi + +import ( + "encoding/json" +) + +// VmResizeDisk struct for VmResizeDisk +type VmResizeDisk struct { + // disk identifier + Id *string `json:"id,omitempty"` + // desired disk size in bytes + DesiredSize *int64 `json:"desired_size,omitempty"` +} + +// NewVmResizeDisk instantiates a new VmResizeDisk object +// This constructor will assign default values to properties that have it defined, +// and makes sure properties required by API are set, but the set of arguments +// will change when the set of required properties is changed +func NewVmResizeDisk() *VmResizeDisk { + this := VmResizeDisk{} + return &this +} + +// NewVmResizeDiskWithDefaults instantiates a new VmResizeDisk object +// This constructor will only assign default values to properties that have it defined, +// but it doesn't guarantee that properties required by API are set +func NewVmResizeDiskWithDefaults() *VmResizeDisk { + this := VmResizeDisk{} + return &this +} + +// GetId returns the Id field value if set, zero value otherwise. +func (o *VmResizeDisk) GetId() string { + if o == nil || o.Id == nil { + var ret string + return ret + } + return *o.Id +} + +// GetIdOk returns a tuple with the Id field value if set, nil otherwise +// and a boolean to check if the value has been set. +func (o *VmResizeDisk) GetIdOk() (*string, bool) { + if o == nil || o.Id == nil { + return nil, false + } + return o.Id, true +} + +// HasId returns a boolean if a field has been set. +func (o *VmResizeDisk) HasId() bool { + if o != nil && o.Id != nil { + return true + } + + return false +} + +// SetId gets a reference to the given string and assigns it to the Id field. +func (o *VmResizeDisk) SetId(v string) { + o.Id = &v +} + +// GetDesiredSize returns the DesiredSize field value if set, zero value otherwise. +func (o *VmResizeDisk) GetDesiredSize() int64 { + if o == nil || o.DesiredSize == nil { + var ret int64 + return ret + } + return *o.DesiredSize +} + +// GetDesiredSizeOk returns a tuple with the DesiredSize field value if set, nil otherwise +// and a boolean to check if the value has been set. +func (o *VmResizeDisk) GetDesiredSizeOk() (*int64, bool) { + if o == nil || o.DesiredSize == nil { + return nil, false + } + return o.DesiredSize, true +} + +// HasDesiredSize returns a boolean if a field has been set. +func (o *VmResizeDisk) HasDesiredSize() bool { + if o != nil && o.DesiredSize != nil { + return true + } + + return false +} + +// SetDesiredSize gets a reference to the given int64 and assigns it to the DesiredSize field. +func (o *VmResizeDisk) SetDesiredSize(v int64) { + o.DesiredSize = &v +} + +func (o VmResizeDisk) MarshalJSON() ([]byte, error) { + toSerialize := map[string]interface{}{} + if o.Id != nil { + toSerialize["id"] = o.Id + } + if o.DesiredSize != nil { + toSerialize["desired_size"] = o.DesiredSize + } + return json.Marshal(toSerialize) +} + +type NullableVmResizeDisk struct { + value *VmResizeDisk + isSet bool +} + +func (v NullableVmResizeDisk) Get() *VmResizeDisk { + return v.value +} + +func (v *NullableVmResizeDisk) Set(val *VmResizeDisk) { + v.value = val + v.isSet = true +} + +func (v NullableVmResizeDisk) IsSet() bool { + return v.isSet +} + +func (v *NullableVmResizeDisk) Unset() { + v.value = nil + v.isSet = false +} + +func NewNullableVmResizeDisk(val *VmResizeDisk) *NullableVmResizeDisk { + return &NullableVmResizeDisk{value: val, isSet: true} +} + +func (v NullableVmResizeDisk) MarshalJSON() ([]byte, error) { + return json.Marshal(v.value) +} + +func (v *NullableVmResizeDisk) UnmarshalJSON(src []byte) error { + v.isSet = true + return json.Unmarshal(src, &v.value) +} diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/cloud-hypervisor.yaml b/src/runtime/virtcontainers/pkg/cloud-hypervisor/cloud-hypervisor.yaml index e4a76f6b7407..629a6800d112 100644 --- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/cloud-hypervisor.yaml +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/cloud-hypervisor.yaml @@ -163,6 +163,22 @@ paths: 429: description: The VM instance could not be resized because a cpu removal is still pending. + /vm.resize-disk: + put: + summary: Resize a disk + requestBody: + description: Resizes a disk attached to the VM + content: + application/json: + schema: + $ref: "#/components/schemas/VmResizeDisk" + required: true + responses: + 204: + description: The disk was successfully resized. + 500: + description: The disk could not be resized. + /vm.resize-zone: put: summary: Resize a memory zone @@ -687,6 +703,9 @@ components: default: false max_phys_bits: type: integer + nested: + type: boolean + default: true affinity: type: array items: @@ -922,6 +941,16 @@ components: type: array items: $ref: "#/components/schemas/VirtQueueAffinity" + backing_files: + type: boolean + default: false + sparse: + type: boolean + default: true + image_type: + type: string + enum: [FixedVhd, Qcow2, Raw, Vhdx, Unknown] + NetConfig: type: object @@ -966,6 +995,15 @@ components: format: int16 rate_limiter_config: $ref: "#/components/schemas/RateLimiterConfig" + offload_tso: + type: boolean + default: true + offload_ufo: + type: boolean + default: true + offload_csum: + type: boolean + default: true RngConfig: required: @@ -1178,6 +1216,8 @@ components: items: type: integer format: int32 + device_id: + type: string VmResize: type: object @@ -1194,6 +1234,17 @@ components: type: integer format: int64 + VmResizeDisk: + type: object + properties: + id: + description: disk identifier + type: string + desired_size: + description: desired disk size in bytes + type: integer + format: int64 + VmResizeZone: type: object properties: diff --git a/versions.yaml b/versions.yaml index 27336c41aec5..68b2836d2f33 100644 --- a/versions.yaml +++ b/versions.yaml @@ -75,7 +75,7 @@ assets: url: "https://github.com/cloud-hypervisor/cloud-hypervisor" uscan-url: >- https://github.com/cloud-hypervisor/cloud-hypervisor/tags.*/v?(\d\S+)\.tar\.gz - version: "v48.0" + version: "v51.1" firecracker: description: "Firecracker micro-VMM" From 040791b3ce8d81e4ecd54b117bad9038cd1afbde Mon Sep 17 00:00:00 2001 From: Dan Mihai Date: Mon, 23 Feb 2026 19:13:20 +0000 Subject: [PATCH 20/36] runtime: clh: disable nested vCPUs on MSHV The recently-added nested property is true by default, but is not supported yet on MSHV. See cloud-hypervisor/cloud-hypervisor#7408 for additional information. Signed-off-by: Dan Mihai --- src/runtime/virtcontainers/clh.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 5ad4a6bb0e2f..3268492eded8 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -587,6 +587,11 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net // Set initial amount of cpu's for the virtual machine clh.vmconfig.Cpus = chclient.NewCpusConfig(int32(clh.config.NumVCPUs()), int32(clh.config.DefaultMaxVCPUs)) + if pathExists("/dev/mshv") { + // The nested property is true by default, but is not supported yet on MSHV. + clh.vmconfig.Cpus.SetNested(false) + } + disableNvdimm := true enableDax := false @@ -1972,3 +1977,10 @@ func (clh *cloudHypervisor) vmInfo() (chclient.VmInfo, error) { func (clh *cloudHypervisor) IsRateLimiterBuiltin() bool { return true } + +func pathExists(path string) bool { + if _, err := os.Stat(path); err != nil { + return false + } + return true +} From 29f1b968b0b2a2895c67deb8388827cacdfdd4bc Mon Sep 17 00:00:00 2001 From: Dan Mihai Date: Wed, 18 Feb 2026 17:00:55 +0000 Subject: [PATCH 21/36] static-build: delete cloud-hypervisor directory This cloud-hypervisor is a directory, so it needs "rm -rf" instead of "rm -f". Signed-off-by: Dan Mihai --- .../packaging/static-build/cloud-hypervisor/build-static-clh.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/packaging/static-build/cloud-hypervisor/build-static-clh.sh b/tools/packaging/static-build/cloud-hypervisor/build-static-clh.sh index ba476c734fb4..a7d5f998d726 100755 --- a/tools/packaging/static-build/cloud-hypervisor/build-static-clh.sh +++ b/tools/packaging/static-build/cloud-hypervisor/build-static-clh.sh @@ -79,7 +79,7 @@ build_clh_from_source() { else ./scripts/dev_cli.sh build --release --libc "${libc}" fi - rm -f cloud-hypervisor + rm -rf cloud-hypervisor cp build/cargo_target/$(uname -m)-unknown-linux-${libc}/release/cloud-hypervisor . popd } From b8fa5a71e8d0792e4e926fe0a6ac22c14a877980 Mon Sep 17 00:00:00 2001 From: Saul Paredes Date: Fri, 3 Apr 2026 10:02:01 -0700 Subject: [PATCH 22/36] required-tests: disable kata-deploy k3s test disable Kata Containers CI / kata-containers-ci-on-push / run-kata-deploy-tests / run-kata-deploy-tests (qemu, k3s) Signed-off-by: Saul Paredes --- tools/testing/gatekeeper/required-tests.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/gatekeeper/required-tests.yaml b/tools/testing/gatekeeper/required-tests.yaml index 1a25d1f9b8b4..c04473368bb3 100644 --- a/tools/testing/gatekeeper/required-tests.yaml +++ b/tools/testing/gatekeeper/required-tests.yaml @@ -104,7 +104,8 @@ mapping: # - Kata Containers CI / kata-containers-ci-on-push / run-kata-coco-tests / run-k8s-tests-coco-nontee (qemu-coco-dev, nydus, guest-pull) # - Kata Containers CI / kata-containers-ci-on-push / run-kata-coco-tests / run-k8s-tests-coco-nontee (qemu-coco-dev-runtime-rs, nydus, guest-pull) - Kata Containers CI / kata-containers-ci-on-push / run-kata-deploy-tests / run-kata-deploy-tests (qemu, k0s) - - Kata Containers CI / kata-containers-ci-on-push / run-kata-deploy-tests / run-kata-deploy-tests (qemu, k3s) + # msft-preview: failing: no runtime for "kata-my-custom-handler" is configured + # - Kata Containers CI / kata-containers-ci-on-push / run-kata-deploy-tests / run-kata-deploy-tests (qemu, k3s) - Kata Containers CI / kata-containers-ci-on-push / run-kata-deploy-tests / run-kata-deploy-tests (qemu, microk8s) - Kata Containers CI / kata-containers-ci-on-push / run-kata-deploy-tests / run-kata-deploy-tests (qemu, rke2) - Kata Containers CI / kata-containers-ci-on-push / run-kata-monitor-tests / run-monitor (qemu, crio) From 585134bd4963827ff6f409dd758908062f8e16e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bombo?= Date: Tue, 24 Mar 2026 10:43:20 -0500 Subject: [PATCH 23/36] build: Don't fail `cargo check` on a dirty tree MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `cargo check` was introduced in 3f1533a to check that Cargo.lock is in sync with Cargo.toml. However, if there are uncommitted changes in the working tree, the current invocation will immediately fail because of the `git diff` call, which is frustrating for local development. As it turns out, `cargo clippy` is a superset of `cargo check`, so we can simply pass `--locked` to `cargo clippy` to detect Cargo.lock issues. This is tested with the following change: diff --git a/src/agent/Cargo.lock b/src/agent/Cargo.lock index 96b6c676d..e1963af00 100644 --- a/src/agent/Cargo.lock +++ b/src/agent/Cargo.lock @@ -4305,6 +4305,7 @@ checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683" name = "test-utils" version = "0.1.0" dependencies = [ - "libc", "nix 0.26.4", ] which results in the following output: $ make -C src/agent check make: Entering directory '/kata-containers/src/agent' standard rust check... cargo fmt -- --check cargo clippy --all-targets --all-features --release --locked \ -- \ -D warnings error: the lock file /kata-containers/src/agent/Cargo.lock needs to be updated but --locked was passed to prevent this If you want to try to generate the lock file without accessing the network, remove the --locked flag and use --offline instead. make: *** [../../utils.mk:184: standard_rust_check] Error 101 make: Leaving directory '/kata-containers/src/agent' Signed-off-by: Aurélien Bombo Signed-off-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> --- utils.mk | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/utils.mk b/utils.mk index 590184ead142..16e9431fbd9f 100644 --- a/utils.mk +++ b/utils.mk @@ -181,16 +181,9 @@ CWD := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) standard_rust_check: @echo "standard rust check..." cargo fmt -- --check - cargo clippy --all-targets --all-features --release \ + cargo clippy --all-targets --all-features --release --locked \ -- \ -D warnings - cargo check - @DIFF=$$(git diff HEAD); \ - if [ -n "$$DIFF" ]; then \ - echo "ERROR: cargo check resulted in uncommited changes"; \ - echo "$$DIFF"; \ - exit 1; \ - fi # Install a file (full version). # From a3da27ead755f3ea8397cf1a5ed8d12f234b2e25 Mon Sep 17 00:00:00 2001 From: Saul Paredes Date: Mon, 27 Apr 2026 14:51:07 -0700 Subject: [PATCH 24/36] Revert "runtime: Enforce that OCI memory limit exceeds 128MB baseline" This reverts commit c06b4704fbb730106341b33ce55ae2198ee300e4. --- src/runtime/Makefile | 6 ------ src/runtime/config/configuration-clh.toml.in | 5 ----- src/runtime/pkg/katautils/config.go | 2 -- src/runtime/pkg/oci/utils.go | 7 ------- tools/osbuilder/node-builder/azure-linux/package_build.sh | 4 ++-- 5 files changed, 2 insertions(+), 22 deletions(-) diff --git a/src/runtime/Makefile b/src/runtime/Makefile index 094bdb722974..cc006a20db58 100644 --- a/src/runtime/Makefile +++ b/src/runtime/Makefile @@ -290,11 +290,6 @@ DEFSTATICRESOURCEMGMT_NV = true DEFSTATICSANDBOXWORKLOADMEM ?= 2048 DEFSTATICSANDBOXWORKLOADVCPUS ?= 1 -# If set, the runtime will enforce that pods deployed in a sandbox -# explicitly setting memory limits using resources.limits.memory -# allow at least this amount of memory in MiB so that the sandbox can properly start. -DEFSANDBOXWORKLOADMEMMIN ?= 128 - DEFDISABLEIMAGENVDIMM ?= false DEFDISABLEIMAGENVDIMM_NV = true DEFDISABLEIMAGENVDIMM_CLH ?= true @@ -790,7 +785,6 @@ USER_VARS += DEFSTATICRESOURCEMGMT_STRATOVIRT USER_VARS += DEFSTATICRESOURCEMGMT_TEE USER_VARS += DEFSTATICRESOURCEMGMT_NV USER_VARS += DEFSTATICSANDBOXWORKLOADMEM -USER_VARS += DEFSANDBOXWORKLOADMEMMIN USER_VARS += DEFSTATICSANDBOXWORKLOADVCPUS USER_VARS += DEFBINDMOUNTS USER_VARS += DEFCREATECONTAINERTIMEOUT diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in index 1e5ca9aa5b00..661246a01e3a 100644 --- a/src/runtime/config/configuration-clh.toml.in +++ b/src/runtime/config/configuration-clh.toml.in @@ -448,11 +448,6 @@ static_sandbox_default_workload_mem=@DEFSTATICSANDBOXWORKLOADMEM@ # default amount of vcpus available within the sandbox. static_sandbox_default_workload_vcpus=@DEFSTATICSANDBOXWORKLOADVCPUS@ -# The runtime will enforce that pods deployed in a sandbox -# explicitly setting memory limits using resources.limits.memory -# allow at least this amount of memory in MiB so that the sandbox can properly start. -sandbox_workload_mem_min=@DEFSANDBOXWORKLOADMEMMIN@ - # If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path. # This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory. # If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts` diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index 91421f297695..184556305210 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -198,7 +198,6 @@ type runtime struct { StaticSandboxResourceMgmt bool `toml:"static_sandbox_resource_mgmt"` StaticSandboxWorkloadDefaultMem uint32 `toml:"static_sandbox_default_workload_mem"` StaticSandboxWorkloadDefaultVcpus float32 `toml:"static_sandbox_default_workload_vcpus"` - SandboxWorkloadMemMin uint32 `toml:"sandbox_workload_mem_min"` EnablePprof bool `toml:"enable_pprof"` DisableGuestEmptyDir bool `toml:"disable_guest_empty_dir"` CreateContainerTimeout uint64 `toml:"create_container_timeout"` @@ -1618,7 +1617,6 @@ func LoadConfiguration(configPath string, ignoreLogging bool) (resolvedConfigPat config.GuestSeLinuxLabel = tomlConf.Runtime.GuestSeLinuxLabel config.StaticSandboxResourceMgmt = tomlConf.Runtime.StaticSandboxResourceMgmt config.StaticSandboxWorkloadDefaultMem = tomlConf.Runtime.StaticSandboxWorkloadDefaultMem - config.SandboxWorkloadMemMin = tomlConf.Runtime.SandboxWorkloadMemMin config.StaticSandboxWorkloadDefaultVcpus = tomlConf.Runtime.StaticSandboxWorkloadDefaultVcpus config.SandboxCgroupOnly = tomlConf.Runtime.SandboxCgroupOnly config.DisableNewNetNs = tomlConf.Runtime.DisableNewNetNs diff --git a/src/runtime/pkg/oci/utils.go b/src/runtime/pkg/oci/utils.go index f5e74169aed0..df7f15c5717e 100644 --- a/src/runtime/pkg/oci/utils.go +++ b/src/runtime/pkg/oci/utils.go @@ -159,9 +159,6 @@ type RuntimeConfig struct { // vcpus to allocate for workloads within the sandbox when workload vcpus is unspecified StaticSandboxWorkloadDefaultVcpus float32 - // Minimum memory (in MiB) to enforce is allocated for workloads within the sandbox when workload memory is specified - SandboxWorkloadMemMin uint32 - // Determines if create a netns for hypervisor process DisableNewNetNs bool @@ -1265,10 +1262,6 @@ func SandboxConfig(ocispec specs.Spec, runtime RuntimeConfig, bundlePath, cid st } - if sandboxConfig.SandboxResources.WorkloadMemMB < runtime.SandboxWorkloadMemMin { - return vc.SandboxConfig{}, fmt.Errorf("pod memory limit too low: minimum %dMiB, got %dMiB", runtime.SandboxWorkloadMemMin, sandboxConfig.SandboxResources.WorkloadMemMB) - } - return sandboxConfig, nil } diff --git a/tools/osbuilder/node-builder/azure-linux/package_build.sh b/tools/osbuilder/node-builder/azure-linux/package_build.sh index 346ba5a9f092..fb93eec197f0 100755 --- a/tools/osbuilder/node-builder/azure-linux/package_build.sh +++ b/tools/osbuilder/node-builder/azure-linux/package_build.sh @@ -29,9 +29,9 @@ runtime_make_flags="SKIP_GO_VERSION_CHECK=1 QEMUCMD= FCCMD= ACRNCMD= STRATOVIRTC # - for ConfPods we explicitly set the cloud-hypervisor path. The path is independent of the PREFIX variable # as we have a single CLH binary for both vanilla Kata and ConfPods if [ "${CONF_PODS}" == "no" ]; then - runtime_make_flags+=" DEFSTATICRESOURCEMGMT_CLH=true KERNELPATH_CLH=${KERNEL_BINARY_LOCATION} DEFSANDBOXWORKLOADMEMMIN=128" + runtime_make_flags+=" DEFSTATICRESOURCEMGMT_CLH=true KERNELPATH_CLH=${KERNEL_BINARY_LOCATION}" else - runtime_make_flags+=" CLHPATH=${CLOUD_HYPERVISOR_LOCATION} DEFSANDBOXWORKLOADMEMMIN=192" + runtime_make_flags+=" CLHPATH=${CLOUD_HYPERVISOR_LOCATION}" fi # On Mariner 3.0 we use cgroupsv2 with a single sandbox cgroup From 86ce6d3c7ec27abd4d2367bb0ec6f82618e24a96 Mon Sep 17 00:00:00 2001 From: Saul Paredes Date: Thu, 26 Mar 2026 09:04:04 -0700 Subject: [PATCH 25/36] node-builder: build and install runtime-rs Build and install both runtime-rs and runtime-go configs and binaries side by side: - runtime-go: /usr/local/bin/containerd-shim-kata-v2-go /usr/local/share/defaults/kata-containers/configuration-clh.toml /usr/local/share/defaults/kata-containers/configuration-clh-debug.toml - runtime-rs: /usr/local/bin/containerd-shim-kata-v2-rs /usr/local/share/defaults/kata-containers/configuration-cloud-hypervisor.toml /usr/local/share/defaults/kata-containers/configuration-cloud-hypervisor-debug.toml Also add USE_RUNTIME_RS variable and default to "yes". This controls which runtime binary and configuration will be installed to /usr/local/bin/containerd-shim-kata-v2 and /usr/local/share/defaults/kata-containers/configuration.toml respectively. Also install kata-ctl (runtime-rs equivalent of kata-runtime) so we can exec into the UVM when using runtime-rs Signed-off-by: Saul Paredes --- .../node-builder/azure-linux/Makefile | 1 + .../node-builder/azure-linux/common.sh | 19 ++++- .../node-builder/azure-linux/package_build.sh | 79 ++++++++++++++----- .../azure-linux/package_install.sh | 40 ++++++++-- 4 files changed, 112 insertions(+), 27 deletions(-) diff --git a/tools/osbuilder/node-builder/azure-linux/Makefile b/tools/osbuilder/node-builder/azure-linux/Makefile index 85ebf59e2114..30318bc80234 100644 --- a/tools/osbuilder/node-builder/azure-linux/Makefile +++ b/tools/osbuilder/node-builder/azure-linux/Makefile @@ -5,6 +5,7 @@ BUILD_TYPE := release export SHIM_REDEPLOY_CONFIG := yes +export USE_RUNTIME_RS := yes ifeq ($(BUILD_TYPE),debug) export AGENT_BUILD_TYPE := debug diff --git a/tools/osbuilder/node-builder/azure-linux/common.sh b/tools/osbuilder/node-builder/azure-linux/common.sh index 8b0665c47aa2..aa53562dbdc1 100755 --- a/tools/osbuilder/node-builder/azure-linux/common.sh +++ b/tools/osbuilder/node-builder/azure-linux/common.sh @@ -12,6 +12,13 @@ OS_VERSION=$(sort -r /etc/*-release | gawk 'match($0, /^(VERSION_ID=(.*))$/, a) ([[ "${OS_VERSION}" == "2.0" ]] || [[ "${OS_VERSION}" == "3.0" ]]) || die "OS_VERSION: value '${OS_VERSION}' must equal 3.0 (default) or 2.0" +SHIM_CONFIG_FILE_NAME_RUNTIME_GO="configuration-clh.toml" +SHIM_DBG_CONFIG_FILE_NAME_RUNTIME_GO="configuration-clh-debug.toml" +CONFIG_DIR_RUNTIME_GO="src/runtime/config" +SHIM_CONFIG_FILE_NAME_RUNTIME_RS="configuration-cloud-hypervisor.toml" +SHIM_DBG_CONFIG_FILE_NAME_RUNTIME_RS="configuration-cloud-hypervisor-debug.toml" +CONFIG_DIR_RUNTIME_RS="src/runtime-rs/config" + if [ "${CONF_PODS}" == "yes" ]; then INSTALL_PATH_PREFIX="/opt/confidential-containers" UVM_TOOLS_PATH_OSB="${INSTALL_PATH_PREFIX}/uvm/tools/osbuilder" @@ -31,15 +38,23 @@ if [ "${CONF_PODS}" == "yes" ]; then SHIM_BINARIES_PATH="/usr/local/bin" SHIM_BINARY_NAME="containerd-shim-kata-cc-v2" else + + # Toggle the default shim implementation installed + if [ "${USE_RUNTIME_RS}" == "yes" ]; then + SHIM_CONFIG_FILE_NAME="${SHIM_CONFIG_FILE_NAME_RUNTIME_RS}" + SHIM_DBG_CONFIG_FILE_NAME="${SHIM_DBG_CONFIG_FILE_NAME_RUNTIME_RS}" + else # runtime-go + SHIM_CONFIG_FILE_NAME="${SHIM_CONFIG_FILE_NAME_RUNTIME_GO}" + SHIM_DBG_CONFIG_FILE_NAME="${SHIM_DBG_CONFIG_FILE_NAME_RUNTIME_GO}" + fi + INSTALL_PATH_PREFIX="/usr" UVM_TOOLS_PATH_OSB="/opt/kata-containers/uvm/tools/osbuilder" UVM_TOOLS_PATH_SRC="/opt/kata-containers/uvm/src" UVM_PATH_DEFAULT="${INSTALL_PATH_PREFIX}/share/kata-containers" IMG_FILE_NAME="kata-containers.img" SHIM_CONFIG_PATH="${INSTALL_PATH_PREFIX}/share/defaults/kata-containers" - SHIM_CONFIG_FILE_NAME="configuration-clh.toml" SHIM_CONFIG_INST_FILE_NAME="configuration.toml" - SHIM_DBG_CONFIG_FILE_NAME="configuration-clh-debug.toml" SHIM_DBG_CONFIG_INST_FILE_NAME="${SHIM_DBG_CONFIG_FILE_NAME}" DEBUGGING_BINARIES_PATH="${INSTALL_PATH_PREFIX}/local/bin" SHIM_BINARIES_PATH="${INSTALL_PATH_PREFIX}/local/bin" diff --git a/tools/osbuilder/node-builder/azure-linux/package_build.sh b/tools/osbuilder/node-builder/azure-linux/package_build.sh index fb93eec197f0..b26d2db127e6 100755 --- a/tools/osbuilder/node-builder/azure-linux/package_build.sh +++ b/tools/osbuilder/node-builder/azure-linux/package_build.sh @@ -19,9 +19,28 @@ repo_dir="${script_dir}/../../../../" common_file="common.sh" source "${common_file}" -# these options ensure we produce the proper CLH config file -runtime_make_flags="SKIP_GO_VERSION_CHECK=1 QEMUCMD= FCCMD= ACRNCMD= STRATOVIRTCMD= DEFAULT_HYPERVISOR=cloud-hypervisor - DEFMEMSZ=0 DEFSTATICSANDBOXWORKLOADMEM=512 DEFVCPUS=0 DEFSTATICSANDBOXWORKLOADVCPUS=1 DEFVIRTIOFSDAEMON=${VIRTIOFSD_BINARY_LOCATION} PREFIX=${INSTALL_PATH_PREFIX}" +runtime_go_make_flags="SKIP_GO_VERSION_CHECK=1 \ + QEMUCMD= \ + FCCMD= \ + ACRNCMD= \ + STRATOVIRTCMD= \ + DEFAULT_HYPERVISOR=cloud-hypervisor \ + DEFMEMSZ=0 \ + DEFSTATICSANDBOXWORKLOADMEM=512 \ + DEFVCPUS=0 \ + DEFSTATICSANDBOXWORKLOADVCPUS=1 \ + DEFVIRTIOFSDAEMON=${VIRTIOFSD_BINARY_LOCATION} \ + PREFIX=${INSTALL_PATH_PREFIX}" + +runtime_rs_make_flags="BUILD_TYPE=release \ + LIBC=gnu \ + HYPERVISOR=cloud-hypervisor \ + OPENSSL_NO_VENDOR=Y \ + USE_BUILDIN_DB=false \ + QEMUCMD= \ + FCCMD= \ + DEFVIRTIOFSDAEMON=${VIRTIOFSD_BINARY_LOCATION} \ + PREFIX=${INSTALL_PATH_PREFIX}" # - for vanilla Kata we use the kernel binary. For ConfPods we use IGVM, so no need to provide kernel path. # - for vanilla Kata we explicitly set DEFSTATICRESOURCEMGMT_CLH. For ConfPods, @@ -29,14 +48,17 @@ runtime_make_flags="SKIP_GO_VERSION_CHECK=1 QEMUCMD= FCCMD= ACRNCMD= STRATOVIRTC # - for ConfPods we explicitly set the cloud-hypervisor path. The path is independent of the PREFIX variable # as we have a single CLH binary for both vanilla Kata and ConfPods if [ "${CONF_PODS}" == "no" ]; then - runtime_make_flags+=" DEFSTATICRESOURCEMGMT_CLH=true KERNELPATH_CLH=${KERNEL_BINARY_LOCATION}" + runtime_go_make_flags+=" DEFSTATICRESOURCEMGMT_CLH=true KERNELPATH_CLH=${KERNEL_BINARY_LOCATION}" + runtime_rs_make_flags+=" DEFSTATICRESOURCEMGMT_CLH=true KERNELPATH_CLH=${KERNEL_BINARY_LOCATION}" else - runtime_make_flags+=" CLHPATH=${CLOUD_HYPERVISOR_LOCATION}" + runtime_go_make_flags+=" CLHPATH=${CLOUD_HYPERVISOR_LOCATION}" + runtime_rs_make_flags+=" CLHPATH=${CLOUD_HYPERVISOR_LOCATION}" fi # On Mariner 3.0 we use cgroupsv2 with a single sandbox cgroup if [ "${OS_VERSION}" == "3.0" ]; then - runtime_make_flags+=" DEFSANDBOXCGROUPONLY=true" + runtime_go_make_flags+=" DEFSANDBOXCGROUPONLY=true" + runtime_rs_make_flags+=" DEFSANDBOXCGROUPONLY_CLH=true" fi agent_make_flags="LIBC=gnu OPENSSL_NO_VENDOR=Y DESTDIR=${AGENT_INSTALL_DIR} BUILD_TYPE=${AGENT_BUILD_TYPE}" @@ -65,29 +87,50 @@ if [ "${CONF_PODS}" == "yes" ]; then popd fi -echo "Building shim binary and configuration" +echo "Building runtime-go shim binary" pushd src/runtime/ if [ "${CONF_PODS}" == "yes" ] || [ "${OS_VERSION}" == "3.0" ]; then - make ${runtime_make_flags} + make ${runtime_go_make_flags} else # Mariner 2 pod sandboxing uses cgroupsv1 - note: cannot add the kernelparams in above assignments, # leads to quotation issue. Hence, implementing the conditional check right here at the time of the make command - make ${runtime_make_flags} KERNELPARAMS="systemd.legacy_systemd_cgroup_controller=yes systemd.unified_cgroup_hierarchy=0" + make ${runtime_go_make_flags} KERNELPARAMS="systemd.legacy_systemd_cgroup_controller=yes systemd.unified_cgroup_hierarchy=0" fi popd -pushd src/runtime/config/ -echo "Creating shim debug configuration" -cp "${SHIM_CONFIG_FILE_NAME}" "${SHIM_DBG_CONFIG_FILE_NAME}" -sed -i '/^#enable_debug =/s|^#||g' "${SHIM_DBG_CONFIG_FILE_NAME}" -sed -i '/^#debug_console_enabled =/s|^#||g' "${SHIM_DBG_CONFIG_FILE_NAME}" +echo "Building runtime-rs shim binary" +pushd src/runtime-rs/ +make ${runtime_rs_make_flags} +popd -if [ "${CONF_PODS}" == "yes" ]; then - echo "Adding debug igvm to SNP shim debug configuration" - sed -i "s|${IGVM_FILE_NAME}|${IGVM_DBG_FILE_NAME}|g" "${SHIM_DBG_CONFIG_FILE_NAME}" -fi +echo "Building kata-ctl binary" +pushd src/tools/kata-ctl/ +make ${runtime_rs_make_flags} popd +create_debug_shim_config() { + local config_dir="$1" + local release_cfg="$2" + local debug_cfg="$3" + + pushd "${config_dir}" + echo "Creating shim debug configuration: ${debug_cfg}" + cp "${release_cfg}" "${debug_cfg}" + # Ensure debug is enabled in the shim config, regardless of whether the + # template uses commented or uncommented keys. + sed -i -E 's|^#?[[:space:]]*enable_debug[[:space:]]*=.*$|enable_debug = true|' "${debug_cfg}" + sed -i -E 's|^#?[[:space:]]*debug_console_enabled[[:space:]]*=.*$|debug_console_enabled = true|' "${debug_cfg}" + + if [ "${CONF_PODS}" == "yes" ]; then + echo "Adding debug igvm to SNP shim debug configuration" + sed -i "s|${IGVM_FILE_NAME}|${IGVM_DBG_FILE_NAME}|g" "${debug_cfg}" + fi + popd +} + +create_debug_shim_config "${CONFIG_DIR_RUNTIME_GO}" "${SHIM_CONFIG_FILE_NAME_RUNTIME_GO}" "${SHIM_DBG_CONFIG_FILE_NAME_RUNTIME_GO}" +create_debug_shim_config "${CONFIG_DIR_RUNTIME_RS}" "${SHIM_CONFIG_FILE_NAME_RUNTIME_RS}" "${SHIM_DBG_CONFIG_FILE_NAME_RUNTIME_RS}" + echo "Building agent binary and generating service files" pushd src/agent/ make ${agent_make_flags} diff --git a/tools/osbuilder/node-builder/azure-linux/package_install.sh b/tools/osbuilder/node-builder/azure-linux/package_install.sh index 791cff5d92d2..e61fcf296825 100755 --- a/tools/osbuilder/node-builder/azure-linux/package_install.sh +++ b/tools/osbuilder/node-builder/azure-linux/package_install.sh @@ -29,6 +29,13 @@ mkdir -p "${PREFIX}/${SHIM_CONFIG_PATH}" mkdir -p "${PREFIX}/${DEBUGGING_BINARIES_PATH}" mkdir -p "${PREFIX}/${SHIM_BINARIES_PATH}" +RUNTIME_GO_SHIM="src/runtime/containerd-shim-kata-v2" +RUNTIME_RS_TARGET="target/x86_64-unknown-linux-gnu/release" +RUNTIME_RS_SHIM="${RUNTIME_RS_TARGET}/containerd-shim-kata-v2" +KATA_CTL_BINARY="src/tools/kata-ctl/target/x86_64-unknown-linux-gnu/release/kata-ctl" +SHIM_BINARY_RUNTIME_GO="${SHIM_BINARY_NAME}-go" +SHIM_BINARY_RUNTIME_RS="${SHIM_BINARY_NAME}-rs" + if [ "${CONF_PODS}" == "yes" ]; then echo "Installing tardev-snapshotter binaries and service file" mkdir -p ${PREFIX}/usr/sbin @@ -50,21 +57,40 @@ cp -a --backup=numbered src/runtime/kata-monitor "${PREFIX}/${DEBUGGING_BINARIES cp -a --backup=numbered src/runtime/kata-runtime "${PREFIX}/${DEBUGGING_BINARIES_PATH}" chmod +x src/runtime/data/kata-collect-data.sh cp -a --backup=numbered src/runtime/data/kata-collect-data.sh "${PREFIX}/${DEBUGGING_BINARIES_PATH}" +cp -a --backup=numbered "${KATA_CTL_BINARY}" "${PREFIX}/${DEBUGGING_BINARIES_PATH}" + +echo "Installing shim binaries side by side" +cp -a --backup=numbered "${RUNTIME_GO_SHIM}" "${PREFIX}/${SHIM_BINARIES_PATH}/${SHIM_BINARY_RUNTIME_GO}" +cp -a --backup=numbered "${RUNTIME_RS_SHIM}" "${PREFIX}/${SHIM_BINARIES_PATH}/${SHIM_BINARY_RUNTIME_RS}" -echo "Installing shim binary" -cp -a --backup=numbered src/runtime/containerd-shim-kata-v2 "${PREFIX}/${SHIM_BINARIES_PATH}"/"${SHIM_BINARY_NAME}" +default_shim_binary="${SHIM_BINARY_RUNTIME_GO}" +shim_config_src_dir="${CONFIG_DIR_RUNTIME_GO}" + +if [ "${USE_RUNTIME_RS}" == "yes" ]; then + default_shim_binary="${SHIM_BINARY_RUNTIME_RS}" + shim_config_src_dir="${CONFIG_DIR_RUNTIME_RS}" +fi + +echo "Installing default shim binary: ${default_shim_binary}" +ln -sf --backup=numbered "${default_shim_binary}" "${PREFIX}/${SHIM_BINARIES_PATH}/${SHIM_BINARY_NAME}" if [ "${SHIM_REDEPLOY_CONFIG}" == "yes" ]; then - echo "Installing shim configuration" - cp -a --backup=numbered src/runtime/config/"${SHIM_CONFIG_FILE_NAME}" "${PREFIX}/${SHIM_CONFIG_PATH}/${SHIM_CONFIG_INST_FILE_NAME}" - cp -a --backup=numbered src/runtime/config/"${SHIM_DBG_CONFIG_FILE_NAME}" "${PREFIX}/${SHIM_CONFIG_PATH}/${SHIM_DBG_CONFIG_INST_FILE_NAME}" + + echo "Installing configurations side by side" + cp -a --backup=numbered "${CONFIG_DIR_RUNTIME_GO}/${SHIM_CONFIG_FILE_NAME_RUNTIME_GO}" "${PREFIX}/${SHIM_CONFIG_PATH}/${SHIM_CONFIG_FILE_NAME_RUNTIME_GO}" + cp -a --backup=numbered "${CONFIG_DIR_RUNTIME_RS}/${SHIM_CONFIG_FILE_NAME_RUNTIME_RS}" "${PREFIX}/${SHIM_CONFIG_PATH}/${SHIM_CONFIG_FILE_NAME_RUNTIME_RS}" + cp -a --backup=numbered "${CONFIG_DIR_RUNTIME_GO}/${SHIM_DBG_CONFIG_FILE_NAME_RUNTIME_GO}" "${PREFIX}/${SHIM_CONFIG_PATH}/${SHIM_DBG_CONFIG_FILE_NAME_RUNTIME_GO}" + cp -a --backup=numbered "${CONFIG_DIR_RUNTIME_RS}/${SHIM_DBG_CONFIG_FILE_NAME_RUNTIME_RS}" "${PREFIX}/${SHIM_CONFIG_PATH}/${SHIM_DBG_CONFIG_FILE_NAME_RUNTIME_RS}" + + echo "Installing default shim configuration: ${SHIM_CONFIG_FILE_NAME}" + cp -a --backup=numbered "${shim_config_src_dir}/${SHIM_CONFIG_FILE_NAME}" "${PREFIX}/${SHIM_CONFIG_PATH}/${SHIM_CONFIG_INST_FILE_NAME}" if [ "${SHIM_USE_DEBUG_CONFIG}" == "yes" ]; then # We simply override the release config with the debug config, # which is probably fine when debugging. Not symlinking as that # would create cycles the next time this script is called. - echo "Overriding shim configuration with debug configuration" - cp -a --backup=numbered src/runtime/config/"${SHIM_DBG_CONFIG_FILE_NAME}" "${PREFIX}/${SHIM_CONFIG_PATH}/${SHIM_CONFIG_INST_FILE_NAME}" + echo "Overriding shim configuration with debug configuration: ${SHIM_DBG_CONFIG_FILE_NAME}" + cp -a --backup=numbered "${shim_config_src_dir}/${SHIM_DBG_CONFIG_FILE_NAME}" "${PREFIX}/${SHIM_CONFIG_PATH}/${SHIM_CONFIG_INST_FILE_NAME}" fi else echo "Skipping installation of shim configuration" From 045090af5b246f6be55ba952f5fe31d149658f69 Mon Sep 17 00:00:00 2001 From: Saul Paredes Date: Mon, 30 Mar 2026 08:47:58 -0700 Subject: [PATCH 26/36] runtime-rs: Resolve high UVM memory footprint This is a port from https://github.com/microsoft/kata-containers/commit/b03db3e3c0e2708e009aa108500601f533ae7b20 into runtime-rs Rationale: This is a temporary solution for optimizing memory usage for the current mechanism of requesting resources through pod Limit annotations: - if no Limits are specified and hence WorkloadMemMB is 0, set a default value 'StaticWorkloadDefaultMem' to allocate a default amount of memory for use for containers in the sandbox in addition to the base memory - if Limits are specified, the base memory and the sum of Limits are allocated. The end user needs to be aware of the minimum memory requirements for their pods, otherwise the pod will be stuck in the ContainerCreating state Testing: Manual testing, creating pods with Limits and without limits, and with two containers where each container has a limit, tested with integration in a SPEC file where the config variables were set via environment variables via the make command Signed-off-by: Saul Paredes --- src/libs/kata-types/src/config/runtime.rs | 4 ++++ src/runtime-rs/Makefile | 5 ++++- .../config/configuration-cloud-hypervisor.toml.in | 5 +++++ src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs | 7 +++++++ 4 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/libs/kata-types/src/config/runtime.rs b/src/libs/kata-types/src/config/runtime.rs index 077c6b96db1a..6c6752efe202 100644 --- a/src/libs/kata-types/src/config/runtime.rs +++ b/src/libs/kata-types/src/config/runtime.rs @@ -123,6 +123,10 @@ pub struct Runtime { #[serde(default)] pub static_sandbox_resource_mgmt: bool, + /// Memory to allocate for workloads within the sandbox when workload memory is unspecified + #[serde(default)] + pub static_sandbox_default_workload_mem: u32, + /// Determines whether container seccomp profiles are passed to the virtual machine and /// applied by the kata agent. If set to true, seccomp is not applied within the guest. #[serde(default)] diff --git a/src/runtime-rs/Makefile b/src/runtime-rs/Makefile index eea49c54f9ab..9b025640973c 100644 --- a/src/runtime-rs/Makefile +++ b/src/runtime-rs/Makefile @@ -163,7 +163,7 @@ DEFVCPUS := 1 ##VAR DEFMAXVCPUS= Default maximum number of vCPUs DEFMAXVCPUS := 0 ##VAR DEFMEMSZ= Default memory size in MiB -DEFMEMSZ := 2048 +DEFMEMSZ ?= 2048 ##VAR DEFMEMSLOTS= Default memory slots # Cases to consider : # - nvdimm rootfs image @@ -214,6 +214,8 @@ DEFVFIOMODE := guest-kernel DEFBINDMOUNTS := [] DEFDANCONF := /run/kata-containers/dans DEFFORCEGUESTPULL := false +# Default memory for workloads within the sandbox when no workload memory is requested. +DEFSTATICSANDBOXWORKLOADMEM ?= 2048 QEMUTDXQUOTEGENERATIONSERVICESOCKETPORT := 4050 # Create Container Timeout in seconds @@ -622,6 +624,7 @@ USER_VARS += KATA_INSTALL_OWNER USER_VARS += KATA_INSTALL_CFG_PERMS USER_VARS += DEFDANCONF USER_VARS += DEFFORCEGUESTPULL +USER_VARS += DEFSTATICSANDBOXWORKLOADMEM USER_VARS += QEMUTDXQUOTEGENERATIONSERVICESOCKETPORT USER_VARS += DEFCREATECONTAINERTIMEOUT USER_VARS += DEFCREATECONTAINERTIMEOUT_COCO diff --git a/src/runtime-rs/config/configuration-cloud-hypervisor.toml.in b/src/runtime-rs/config/configuration-cloud-hypervisor.toml.in index 4b981d47c099..fb4ad0662462 100644 --- a/src/runtime-rs/config/configuration-cloud-hypervisor.toml.in +++ b/src/runtime-rs/config/configuration-cloud-hypervisor.toml.in @@ -522,6 +522,11 @@ enable_pprof = false # - When running single containers using a tool like ctr, container sizing information will be available. static_sandbox_resource_mgmt = @DEFSTATICRESOURCEMGMT_CLH@ +# If set, the runtime will use the value as the default workload memory in MiB for the sandbox when no workload memory request is passed +# down to the shim via the OCI when static sandbox resource management is enabled. With this, we ensure that workloads have a proper +# default amount of memory available within the sandbox. +static_sandbox_default_workload_mem = @DEFSTATICSANDBOXWORKLOADMEM@ + # If specified, sandbox_bind_mounts identifieds host paths to be mounted(ro, rw) into the sandboxes shared path. # This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory. # If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts` diff --git a/src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs b/src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs index 9b56af9734fa..87da0398a482 100644 --- a/src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs +++ b/src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs @@ -140,9 +140,16 @@ impl InitialSizeManager { .get_mut(hypervisor_name) .context("failed to get hypervisor config")?; + if config.runtime.static_sandbox_resource_mgmt { + if self.resource.mem_mb == 0 { + self.resource.mem_mb = config.runtime.static_sandbox_default_workload_mem; + } + } + if self.resource.vcpu > 0.0 { info!(sl!(), "resource with vcpu {}", self.resource.vcpu); } + self.resource.orig_toml_default_mem = hv.memory_info.default_memory; if self.resource.mem_mb > 0 { // since the memory overhead introduced by kata-agent and system components From 819f0db6d77e59ef9f1035c5eece080cc16241ac Mon Sep 17 00:00:00 2001 From: Saul Paredes Date: Mon, 30 Mar 2026 08:52:47 -0700 Subject: [PATCH 27/36] runtime-rs: improved memory overhead management This is a port from https://github.com/microsoft/kata-containers/commit/7ddec33642c9a307d4af79078c2f15be887bec86 into runtime-rs After these changes: 1. The value of the K8s runtime class memory overhead: - Covers the memory usage from all the Host-side components (mainly the Kata Shim and the VMM). - Doesn't include the memory usage from any Guest-side components. 2. The value of a pod memory limit specified by the user: - Is equal to the memory size of the Pod VM. - Includes the memory usage from all the Guest-side components (mainly user's workload, the Guest kernel, and the Kata Agent) - Doesn't include the memory usage from any Host-side components. Signed-off-by: Saul Paredes --- src/libs/kata-types/src/config/default.rs | 2 +- src/libs/kata-types/src/config/hypervisor/ch.rs | 9 --------- src/libs/kata-types/src/config/hypervisor/mod.rs | 9 ++------- .../crates/runtimes/virt_container/src/factory/vm.rs | 5 ----- .../osbuilder/node-builder/azure-linux/package_build.sh | 4 +++- 5 files changed, 6 insertions(+), 23 deletions(-) diff --git a/src/libs/kata-types/src/config/default.rs b/src/libs/kata-types/src/config/default.rs index 7a4e42b80eb3..2839de928d5a 100644 --- a/src/libs/kata-types/src/config/default.rs +++ b/src/libs/kata-types/src/config/default.rs @@ -93,7 +93,7 @@ pub const DEFAULT_CH_MEMORY_SLOTS: u32 = 128; pub const DEFAULT_CH_PCI_BRIDGES: u32 = 2; pub const MAX_CH_PCI_BRIDGES: u32 = 5; pub const MAX_CH_VCPUS: u32 = 256; -pub const MIN_CH_MEMORY_SIZE_MB: u32 = 64; +pub const MIN_CH_MEMORY_SIZE_MB: u32 = 0; //Default configuration for firecracker pub const DEFAULT_FIRECRACKER_ENTROPY_SOURCE: &str = "/dev/urandom"; diff --git a/src/libs/kata-types/src/config/hypervisor/ch.rs b/src/libs/kata-types/src/config/hypervisor/ch.rs index 2d017ae211ce..55bc29acc13b 100644 --- a/src/libs/kata-types/src/config/hypervisor/ch.rs +++ b/src/libs/kata-types/src/config/hypervisor/ch.rs @@ -79,9 +79,6 @@ impl ConfigPlugin for CloudHypervisorConfig { ch.machine_info.entropy_source = default::DEFAULT_CH_ENTROPY_SOURCE.to_string(); } - if ch.memory_info.default_memory == 0 { - ch.memory_info.default_memory = default::DEFAULT_CH_MEMORY_SIZE_MB; - } if ch.memory_info.memory_slots == 0 { ch.memory_info.memory_slots = default::DEFAULT_CH_MEMORY_SLOTS; } @@ -129,12 +126,6 @@ impl ConfigPlugin for CloudHypervisorConfig { ch.device_info.default_bridges, ))); } - - if ch.memory_info.default_memory < MIN_CH_MEMORY_SIZE_MB { - return Err(std::io::Error::other(format!( - "CH hypervisor has minimal memory limitation {MIN_CH_MEMORY_SIZE_MB}", - ))); - } } Ok(()) diff --git a/src/libs/kata-types/src/config/hypervisor/mod.rs b/src/libs/kata-types/src/config/hypervisor/mod.rs index 20be1909b2b8..894d825bc6e2 100644 --- a/src/libs/kata-types/src/config/hypervisor/mod.rs +++ b/src/libs/kata-types/src/config/hypervisor/mod.rs @@ -1031,19 +1031,14 @@ impl MemoryInfo { /// Validates the memory configuration information. /// - /// This ensures that critical memory parameters like `default_memory` - /// and `memory_slots` are non-zero, and checks the validity of + /// This ensures that critical memory parameters like `memory_slots` are + /// non-zero, and checks the validity of /// the memory backend file path. pub fn validate(&self) -> Result<()> { validate_path!( self.file_mem_backend, "Memory backend file {} is invalid: {}" )?; - if self.default_memory == 0 { - return Err(std::io::Error::other( - "Configured memory size for guest VM is zero", - )); - } if self.memory_slots == 0 { return Err(std::io::Error::other( "Configured memory slots for guest VM are zero", diff --git a/src/runtime-rs/crates/runtimes/virt_container/src/factory/vm.rs b/src/runtime-rs/crates/runtimes/virt_container/src/factory/vm.rs index 9be1e68aed0e..58dfddd45008 100644 --- a/src/runtime-rs/crates/runtimes/virt_container/src/factory/vm.rs +++ b/src/runtime-rs/crates/runtimes/virt_container/src/factory/vm.rs @@ -137,11 +137,6 @@ impl VmConfig { conf.cpu_info.default_vcpus = default::DEFAULT_GUEST_VCPUS as f32; } - // memory_size - if conf.memory_info.default_memory == 0 { - conf.memory_info.default_memory = default::DEFAULT_QEMU_MEMORY_SIZE_MB; - } - // default_bridges if conf.device_info.default_bridges == 0 { conf.device_info.default_bridges = default::DEFAULT_QEMU_PCI_BRIDGES; diff --git a/tools/osbuilder/node-builder/azure-linux/package_build.sh b/tools/osbuilder/node-builder/azure-linux/package_build.sh index b26d2db127e6..69300a3b038d 100755 --- a/tools/osbuilder/node-builder/azure-linux/package_build.sh +++ b/tools/osbuilder/node-builder/azure-linux/package_build.sh @@ -40,7 +40,9 @@ runtime_rs_make_flags="BUILD_TYPE=release \ QEMUCMD= \ FCCMD= \ DEFVIRTIOFSDAEMON=${VIRTIOFSD_BINARY_LOCATION} \ - PREFIX=${INSTALL_PATH_PREFIX}" + PREFIX=${INSTALL_PATH_PREFIX}" \ + DEFMEMSZ=0 \ + DEFSTATICSANDBOXWORKLOADMEM=512 # - for vanilla Kata we use the kernel binary. For ConfPods we use IGVM, so no need to provide kernel path. # - for vanilla Kata we explicitly set DEFSTATICRESOURCEMGMT_CLH. For ConfPods, From 7587d1eaa7cd601d2983e64435086b9798ba127a Mon Sep 17 00:00:00 2001 From: Saul Paredes Date: Mon, 30 Mar 2026 08:54:05 -0700 Subject: [PATCH 28/36] runtime-rs: Allocate default workload vcpus This is a port from https://github.com/microsoft/kata-containers/commit/9af9844bc7331d0a37d0159f299ab3fb0e443669 Plus ports an existing behaviour from runtime-go to also add the vcpus. See https://github.com/fidencio/kata-containers/blob/e2476f587c472d5d217df9c75cdb80193dd85994/src/runtime/pkg/oci/utils.go#L1232 - similar to the static_sandbox_default_workload_mem option, assign a default number of vcpus to the VM when no limits are given, 1 vcpu in this case - similar to commit c7b8ee9, do not allocate additional vcpus when limits are provided Signed-off-by: Saul Paredes --- src/libs/kata-types/src/config/default.rs | 2 +- src/libs/kata-types/src/config/runtime.rs | 5 +++++ src/runtime-rs/Makefile | 6 ++++-- .../config/configuration-cloud-hypervisor.toml.in | 5 +++++ src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs | 4 ++++ tools/osbuilder/node-builder/azure-linux/package_build.sh | 6 ++++-- 6 files changed, 23 insertions(+), 5 deletions(-) diff --git a/src/libs/kata-types/src/config/default.rs b/src/libs/kata-types/src/config/default.rs index 2839de928d5a..8cbe988a2d49 100644 --- a/src/libs/kata-types/src/config/default.rs +++ b/src/libs/kata-types/src/config/default.rs @@ -54,7 +54,7 @@ pub const MAX_SHARED_9PFS_SIZE_MB: u32 = 8 * 1024 * 1024; pub const DEFAULT_GUEST_HOOK_PATH: &str = "/opt/kata/hooks"; pub const DEFAULT_GUEST_DNS_FILE: &str = "/etc/resolv.conf"; -pub const DEFAULT_GUEST_VCPUS: u32 = 1; +pub const DEFAULT_GUEST_VCPUS: u32 = 0; // Default configuration for dragonball pub const DEFAULT_DRAGONBALL_GUEST_KERNEL_IMAGE: &str = "vmlinuz"; diff --git a/src/libs/kata-types/src/config/runtime.rs b/src/libs/kata-types/src/config/runtime.rs index 6c6752efe202..1d91010ef4a6 100644 --- a/src/libs/kata-types/src/config/runtime.rs +++ b/src/libs/kata-types/src/config/runtime.rs @@ -127,6 +127,11 @@ pub struct Runtime { #[serde(default)] pub static_sandbox_default_workload_mem: u32, + /// Default workload vcpus added to the sandbox when static resource management + /// is enabled and no explicit workload vcpu limit was provided. + #[serde(default)] + pub static_sandbox_default_workload_vcpus: f32, + /// Determines whether container seccomp profiles are passed to the virtual machine and /// applied by the kata agent. If set to true, seccomp is not applied within the guest. #[serde(default)] diff --git a/src/runtime-rs/Makefile b/src/runtime-rs/Makefile index 9b025640973c..1de2febcecf5 100644 --- a/src/runtime-rs/Makefile +++ b/src/runtime-rs/Makefile @@ -159,7 +159,7 @@ FIRMWARE_SNP_PATH := $(PREFIXDEPS)/share/ovmf/AMDSEV.fd FIRMWARE_VOLUME_SNP_PATH := ##VAR DEFVCPUS= Default number of vCPUs -DEFVCPUS := 1 +DEFVCPUS ?= 1 ##VAR DEFMAXVCPUS= Default maximum number of vCPUs DEFMAXVCPUS := 0 ##VAR DEFMEMSZ= Default memory size in MiB @@ -214,8 +214,9 @@ DEFVFIOMODE := guest-kernel DEFBINDMOUNTS := [] DEFDANCONF := /run/kata-containers/dans DEFFORCEGUESTPULL := false -# Default memory for workloads within the sandbox when no workload memory is requested. +# Default memory and vcpus for workloads within the sandbox when no workload values are requested. DEFSTATICSANDBOXWORKLOADMEM ?= 2048 +DEFSTATICSANDBOXWORKLOADVCPUS ?= 1 QEMUTDXQUOTEGENERATIONSERVICESOCKETPORT := 4050 # Create Container Timeout in seconds @@ -625,6 +626,7 @@ USER_VARS += KATA_INSTALL_CFG_PERMS USER_VARS += DEFDANCONF USER_VARS += DEFFORCEGUESTPULL USER_VARS += DEFSTATICSANDBOXWORKLOADMEM +USER_VARS += DEFSTATICSANDBOXWORKLOADVCPUS USER_VARS += QEMUTDXQUOTEGENERATIONSERVICESOCKETPORT USER_VARS += DEFCREATECONTAINERTIMEOUT USER_VARS += DEFCREATECONTAINERTIMEOUT_COCO diff --git a/src/runtime-rs/config/configuration-cloud-hypervisor.toml.in b/src/runtime-rs/config/configuration-cloud-hypervisor.toml.in index fb4ad0662462..7ab232f882af 100644 --- a/src/runtime-rs/config/configuration-cloud-hypervisor.toml.in +++ b/src/runtime-rs/config/configuration-cloud-hypervisor.toml.in @@ -527,6 +527,11 @@ static_sandbox_resource_mgmt = @DEFSTATICRESOURCEMGMT_CLH@ # default amount of memory available within the sandbox. static_sandbox_default_workload_mem = @DEFSTATICSANDBOXWORKLOADMEM@ +# If set, the runtime will use the value as the default number of vcpus for the sandbox when no workload vcpu request is passed +# down to the shim via the OCI when static sandbox resource management is enabled. With this, we ensure that workloads have a proper +# default amount of vcpus available within the sandbox. +static_sandbox_default_workload_vcpus = @DEFSTATICSANDBOXWORKLOADVCPUS@ + # If specified, sandbox_bind_mounts identifieds host paths to be mounted(ro, rw) into the sandboxes shared path. # This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory. # If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts` diff --git a/src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs b/src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs index 87da0398a482..c903c72617a8 100644 --- a/src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs +++ b/src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs @@ -144,6 +144,10 @@ impl InitialSizeManager { if self.resource.mem_mb == 0 { self.resource.mem_mb = config.runtime.static_sandbox_default_workload_mem; } + + if self.resource.vcpu == 0.0 { + self.resource.vcpu = config.runtime.static_sandbox_default_workload_vcpus; + } } if self.resource.vcpu > 0.0 { diff --git a/tools/osbuilder/node-builder/azure-linux/package_build.sh b/tools/osbuilder/node-builder/azure-linux/package_build.sh index 69300a3b038d..90760778eef4 100755 --- a/tools/osbuilder/node-builder/azure-linux/package_build.sh +++ b/tools/osbuilder/node-builder/azure-linux/package_build.sh @@ -40,9 +40,11 @@ runtime_rs_make_flags="BUILD_TYPE=release \ QEMUCMD= \ FCCMD= \ DEFVIRTIOFSDAEMON=${VIRTIOFSD_BINARY_LOCATION} \ - PREFIX=${INSTALL_PATH_PREFIX}" \ + PREFIX=${INSTALL_PATH_PREFIX} \ DEFMEMSZ=0 \ - DEFSTATICSANDBOXWORKLOADMEM=512 + DEFSTATICSANDBOXWORKLOADMEM=512 \ + DEFVCPUS=0 \ + DEFSTATICSANDBOXWORKLOADVCPUS=1" # - for vanilla Kata we use the kernel binary. For ConfPods we use IGVM, so no need to provide kernel path. # - for vanilla Kata we explicitly set DEFSTATICRESOURCEMGMT_CLH. For ConfPods, From 1aa453a3a35661bf116eeadf832c27f43a810b59 Mon Sep 17 00:00:00 2001 From: Saul Paredes Date: Thu, 23 Apr 2026 11:21:05 -0700 Subject: [PATCH 29/36] runtime-rs: ch: disable nested vCPUs on MSHV This is a runtime-rs port for https://github.com/kata-containers/kata-containers/commit/7973e4e2a88fcda9fd137d84b6a7d6ac56e5739e The recently-added nested property is true by default, but is not supported yet on MSHV. See https://github.com/cloud-hypervisor/cloud-hypervisor/pull/7408 for additional information. Signed-off-by: Saul Paredes --- .../crates/hypervisor/ch-config/src/convert.rs | 18 +++++++++++++++++- .../crates/hypervisor/ch-config/src/lib.rs | 3 +++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/runtime-rs/crates/hypervisor/ch-config/src/convert.rs b/src/runtime-rs/crates/hypervisor/ch-config/src/convert.rs index cd6998a649f0..46bc9dda4c61 100644 --- a/src/runtime-rs/crates/hypervisor/ch-config/src/convert.rs +++ b/src/runtime-rs/crates/hypervisor/ch-config/src/convert.rs @@ -18,7 +18,7 @@ use kata_types::config::hypervisor::{ }; use kata_types::config::BootInfo; use std::convert::TryFrom; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use crate::errors::*; @@ -36,6 +36,17 @@ pub const DEFAULT_NUM_PCI_SEGMENTS: u16 = 1; pub const DEFAULT_DISK_QUEUES: usize = 1; pub const DEFAULT_DISK_QUEUE_SIZE: u16 = 128; +const MSHV_DEVICE_PATH: &str = "/dev/mshv"; + +fn cpu_nested_config() -> Option { + if Path::new(MSHV_DEVICE_PATH).exists() { + // Nested vCPUs are not supported on MSHV yet. + Some(false) + } else { + None + } +} + // TDX requires all rootfs's be mounted using a block device. This test // ensures that the user has a correct set of values for the following Kata // Containers configuration "hypervisor" section variables: @@ -351,6 +362,7 @@ impl TryFrom<(CpuInfo, GuestProtection)> for CpusConfig { let cfg = CpusConfig { boot_vcpus, max_vcpus, + nested: cpu_nested_config(), max_phys_bits, topology: Some(topology), features, @@ -615,6 +627,7 @@ mod tests { let cpus_config = CpusConfig { boot_vcpus: cpu_default, max_vcpus, + nested: cpu_nested_config(), topology: Some(CpuTopology { cores_per_die: max_vcpus, @@ -1185,6 +1198,7 @@ mod tests { result: Ok(CpusConfig { boot_vcpus: 1, max_vcpus: 1, + nested: cpu_nested_config(), topology: Some(CpuTopology { cores_per_die: 1, @@ -1205,6 +1219,7 @@ mod tests { result: Ok(CpusConfig { boot_vcpus: 1, max_vcpus: 3, + nested: cpu_nested_config(), topology: Some(CpuTopology { cores_per_die: 3, @@ -1225,6 +1240,7 @@ mod tests { result: Ok(CpusConfig { boot_vcpus: 1, max_vcpus: 1, + nested: cpu_nested_config(), topology: Some(CpuTopology { cores_per_die: 1, diff --git a/src/runtime-rs/crates/hypervisor/ch-config/src/lib.rs b/src/runtime-rs/crates/hypervisor/ch-config/src/lib.rs index b2039f6331d5..deb0978f040d 100644 --- a/src/runtime-rs/crates/hypervisor/ch-config/src/lib.rs +++ b/src/runtime-rs/crates/hypervisor/ch-config/src/lib.rs @@ -75,6 +75,9 @@ pub struct CpusConfig { pub topology: Option, #[serde(default)] pub kvm_hyperv: bool, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub nested: Option, #[serde(skip_serializing_if = "u8_is_zero")] pub max_phys_bits: u8, #[serde(default)] From 7dd05eef956b693ec3d1a232238211cc4bb53e4b Mon Sep 17 00:00:00 2001 From: Saul Paredes Date: Mon, 13 Apr 2026 14:33:33 -0700 Subject: [PATCH 30/36] runtime-rs: add build optimization flags upstream cherry-pick of 9404104aba4c5a33cb0598492f65f74b15f6faa9 Enable the following optimizations when building runtime-rs in release mode: - lto: true - codegen-units=1: Setting these reduce the binary size and improve performance at the cost of longer build times. Without these flags: - build time: 4m 55s - binary size: 51 MB With these flags: - build time: 7m 21s - binary size: 38MB Per https://github.com/kata-containers/kata-containers/issues/1125 and local experiments, a smaller binary size leads to a smaller shim memory footprint. - https://nnethercote.github.io/perf-book/build-configuration.html#codegen-units - https://nnethercote.github.io/perf-book/build-configuration.html#link-time-optimization Signed-off-by: Saul Paredes --- src/runtime-rs/Makefile | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/runtime-rs/Makefile b/src/runtime-rs/Makefile index 1de2febcecf5..2ba05e1d4a52 100644 --- a/src/runtime-rs/Makefile +++ b/src/runtime-rs/Makefile @@ -17,6 +17,13 @@ CONTAINERD_RUNTIME_NAME = io.containerd.kata.v2 include ../../utils.mk +##VAR RELEASE_LTO=true|false|thin|fat LTO setting used for release builds +RELEASE_LTO ?= true +##VAR RELEASE_CODEGEN_UNITS= codegen units used for release builds +RELEASE_CODEGEN_UNITS ?= 1 +# Apply release profile overrides only for release builds. +RELEASE_CARGO_PROFILE_ENV = $(if $(findstring release,$(BUILD_TYPE)),CARGO_PROFILE_RELEASE_LTO=$(RELEASE_LTO) CARGO_PROFILE_RELEASE_CODEGEN_UNITS=$(RELEASE_CODEGEN_UNITS),) + ARCH_DIR = arch ARCH_FILE_SUFFIX = -options.mk ARCH_FILE = $(ARCH_DIR)/$(ARCH)$(ARCH_FILE_SUFFIX) @@ -738,7 +745,7 @@ static-checks-build: $(GENERATED_FILES) $(TARGET): $(GENERATED_FILES) $(TARGET_PATH) $(TARGET_PATH): $(SOURCES) | show-summary - @RUSTFLAGS="$(EXTRA_RUSTFLAGS) --deny warnings" cargo build --target $(TRIPLE) $(if $(findstring release,$(BUILD_TYPE)),--release) $(EXTRA_RUSTFEATURES) + @$(RELEASE_CARGO_PROFILE_ENV) RUSTFLAGS="$(EXTRA_RUSTFLAGS) --deny warnings" cargo build --target $(TRIPLE) $(if $(findstring release,$(BUILD_TYPE)),--release) $(EXTRA_RUSTFEATURES) $(GENERATED_FILES): %: %.in @sed \ @@ -748,7 +755,7 @@ $(GENERATED_FILES): %: %.in ##TARGET optimize: optimized build optimize: $(SOURCES) | show-summary show-header - @RUSTFLAGS="-C link-arg=-s $(EXTRA_RUSTFLAGS) --deny warnings" cargo build --target $(TRIPLE) --$(BUILD_TYPE) $(EXTRA_RUSTFEATURES) + @$(RELEASE_CARGO_PROFILE_ENV) RUSTFLAGS="-C link-arg=-s $(EXTRA_RUSTFLAGS) --deny warnings" cargo build --target $(TRIPLE) --$(BUILD_TYPE) $(EXTRA_RUSTFEATURES) ##TARGET clean: clean build clean: clean-generated-files From f33ca7e2e9328c739f65dc01731b62f3023f906a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 7 Apr 2026 15:30:15 +0200 Subject: [PATCH 31/36] runtime-rs: Fix initial vCPU / memory with static_sandbox_resource_mgmt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit InitialSizeManager::setup_config() is responsible for applying the sandbox workload sizing (computed from containerd/CRI-O sandbox annotations) to the hypervisor configuration before VM creation. Previously, the workload vCPU count was only logged but never actually added to default_vcpus, so the VM was always created with only the base vCPUs from the configuration/annotations. This caused the k8s-sandbox-vcpus-allocation test to fail with qemu-snp-runtime-rs: a pod with default_vcpus=0.75 and a container CPU limit of 1.2 should see ceil(0.75 + 1.2) = 2 vCPUs, but only got 1. Additionally, the workload memory was being added to default_memory unconditionally, diverging from the Go runtime which only applies both CPU and memory additions when static_sandbox_resource_mgmt is enabled. In the non-static path, adding workload resources here would cause double-counting: once from setup_config() at sandbox creation, and again from update_cpu_resources()/update_mem_resources() when individual containers are added. Guard both additions behind static_sandbox_resource_mgmt, matching the Go runtime's behavior in src/runtime/pkg/oci/utils.go: if sandboxConfig.StaticResourceMgmt { sandboxConfig.HypervisorConfig.NumVCPUsF += sandboxConfig.SandboxResources.WorkloadCPUs sandboxConfig.HypervisorConfig.MemorySize += sandboxConfig.SandboxResources.WorkloadMemMB } Fixes: k8s-sandbox-vcpus-allocation test failure on qemu-snp-runtime-rs Signed-off-by: Fabiano Fidêncio Made-with: Cursor --- .../resource/src/cpu_mem/initial_size.rs | 127 +++++++++++++++++- 1 file changed, 121 insertions(+), 6 deletions(-) diff --git a/src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs b/src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs index c903c72617a8..927dbdf4213c 100644 --- a/src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs +++ b/src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs @@ -152,16 +152,24 @@ impl InitialSizeManager { if self.resource.vcpu > 0.0 { info!(sl!(), "resource with vcpu {}", self.resource.vcpu); + if config.runtime.static_sandbox_resource_mgmt { + hv.cpu_info.default_vcpus += self.resource.vcpu; + let new_vcpus_ceil = hv.cpu_info.default_vcpus.ceil() as u32; + if hv.cpu_info.default_maxvcpus < new_vcpus_ceil { + hv.cpu_info.default_maxvcpus = new_vcpus_ceil; + } + } } self.resource.orig_toml_default_mem = hv.memory_info.default_memory; if self.resource.mem_mb > 0 { - // since the memory overhead introduced by kata-agent and system components - // will really affect the amount of memory the user can use, so we choose to - // plus the default_memory here, instead of overriding it. - // (if we override the default_memory here, and user apllications still - // use memory as they orignally expected, it would be easy to OOM.) - hv.memory_info.default_memory += self.resource.mem_mb; + info!(sl!(), "resource with memory {}", self.resource.mem_mb); + if config.runtime.static_sandbox_resource_mgmt { + hv.memory_info.default_memory += self.resource.mem_mb; + if hv.memory_info.default_maxmemory < hv.memory_info.default_memory { + hv.memory_info.default_maxmemory = hv.memory_info.default_memory; + } + } } Ok(()) } @@ -377,4 +385,111 @@ mod tests { ); } } + + fn make_config( + default_vcpus: f32, + default_maxvcpus: u32, + default_memory: u32, + default_maxmemory: u32, + static_sandbox_resource_mgmt: bool, + ) -> TomlConfig { + use kata_types::config::Hypervisor; + + let mut config = TomlConfig::default(); + config + .hypervisor + .insert("qemu".to_owned(), Hypervisor::default()); + config.hypervisor.entry("qemu".to_owned()).and_modify(|hv| { + hv.cpu_info.default_vcpus = default_vcpus; + hv.cpu_info.default_maxvcpus = default_maxvcpus; + hv.memory_info.default_memory = default_memory; + hv.memory_info.default_maxmemory = default_maxmemory; + }); + config.runtime.hypervisor_name = "qemu".to_owned(); + config.runtime.static_sandbox_resource_mgmt = static_sandbox_resource_mgmt; + config + } + + #[test] + fn test_setup_config_static_applies_vcpu_and_memory() { + let mut config = make_config(1.0, 4, 256, 4096, true); + let mut mgr = InitialSizeManager { + resource: InitialSize { + vcpu: 1.2, + mem_mb: 512, + orig_toml_default_mem: 0, + }, + }; + + mgr.setup_config(&mut config).unwrap(); + let hv = config.hypervisor.get("qemu").unwrap(); + assert_eq!(hv.cpu_info.default_vcpus, 2.2); + assert_eq!(hv.memory_info.default_memory, 768); + } + + #[test] + fn test_setup_config_non_static_does_not_apply() { + let mut config = make_config(1.0, 4, 256, 4096, false); + let mut mgr = InitialSizeManager { + resource: InitialSize { + vcpu: 1.2, + mem_mb: 512, + orig_toml_default_mem: 0, + }, + }; + + mgr.setup_config(&mut config).unwrap(); + let hv = config.hypervisor.get("qemu").unwrap(); + assert_eq!(hv.cpu_info.default_vcpus, 1.0); + assert_eq!(hv.memory_info.default_memory, 256); + } + + #[test] + fn test_setup_config_clamps_maxvcpus() { + let mut config = make_config(1.0, 2, 256, 4096, true); + let mut mgr = InitialSizeManager { + resource: InitialSize { + vcpu: 2.5, + mem_mb: 0, + orig_toml_default_mem: 0, + }, + }; + + mgr.setup_config(&mut config).unwrap(); + let hv = config.hypervisor.get("qemu").unwrap(); + assert_eq!(hv.cpu_info.default_vcpus, 3.5); + assert_eq!(hv.cpu_info.default_maxvcpus, 4); + } + + #[test] + fn test_setup_config_clamps_maxmemory() { + let mut config = make_config(1.0, 4, 256, 300, true); + let mut mgr = InitialSizeManager { + resource: InitialSize { + vcpu: 0.0, + mem_mb: 512, + orig_toml_default_mem: 0, + }, + }; + + mgr.setup_config(&mut config).unwrap(); + let hv = config.hypervisor.get("qemu").unwrap(); + assert_eq!(hv.memory_info.default_memory, 768); + assert_eq!(hv.memory_info.default_maxmemory, 768); + } + + #[test] + fn test_setup_config_preserves_orig_toml_default_mem() { + let mut config = make_config(1.0, 4, 256, 4096, true); + let mut mgr = InitialSizeManager { + resource: InitialSize { + vcpu: 0.0, + mem_mb: 128, + orig_toml_default_mem: 0, + }, + }; + + mgr.setup_config(&mut config).unwrap(); + assert_eq!(mgr.get_orig_toml_default_mem(), 256); + } } From 99043b2a29ee4b0b9ddc3b75ff826bcf42f41348 Mon Sep 17 00:00:00 2001 From: Saul Paredes Date: Fri, 8 May 2026 11:05:23 -0700 Subject: [PATCH 32/36] runtime-rs: add test coverage for static resource management If using static management and initial size manager uses 0 for CPU or memory, we add default static values to the hv config Signed-off-by: Saul Paredes --- .../resource/src/cpu_mem/initial_size.rs | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs b/src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs index 927dbdf4213c..1b6b312ad160 100644 --- a/src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs +++ b/src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs @@ -492,4 +492,27 @@ mod tests { mgr.setup_config(&mut config).unwrap(); assert_eq!(mgr.get_orig_toml_default_mem(), 256); } + + #[test] + fn test_setup_config_static_uses_default_workload_resources_when_unset() { + let mut config = make_config(1.0, 2, 256, 300, true); + config.runtime.static_sandbox_default_workload_vcpus = 1.5; + config.runtime.static_sandbox_default_workload_mem = 512; + + let mut mgr = InitialSizeManager { + resource: InitialSize { + vcpu: 0.0, + mem_mb: 0, + orig_toml_default_mem: 0, + }, + }; + + mgr.setup_config(&mut config).unwrap(); + + let hv = config.hypervisor.get("qemu").unwrap(); + assert_eq!(hv.cpu_info.default_vcpus, 2.5); + assert_eq!(hv.cpu_info.default_maxvcpus, 3); + assert_eq!(hv.memory_info.default_memory, 768); + assert_eq!(hv.memory_info.default_maxmemory, 768); + } } From ef5c908094b1bd071bdb51cf4f480944042264e0 Mon Sep 17 00:00:00 2001 From: Saul Paredes Date: Tue, 12 May 2026 15:11:49 -0700 Subject: [PATCH 33/36] runtime-rs: static resources: always set maxvcpus equal to vcpus based on current runtime-go behaviour introduced in https://github.com/kata-containers/kata-containers/pull/9195 When using static resources, always set maxvcpus value equal to the vcpus value. This is because the static resources case does not support dynamic CPU hotplugging, and therefore the maximum number of vCPUs should be limited to the number of vCPUs. Booting with a high number of max vCPUs is a bit slower compared to a lower number. Signed-off-by: Saul Paredes --- .../resource/src/cpu_mem/initial_size.rs | 26 ++++++++++++++++--- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs b/src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs index 1b6b312ad160..d1d0ad503afe 100644 --- a/src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs +++ b/src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs @@ -154,13 +154,14 @@ impl InitialSizeManager { info!(sl!(), "resource with vcpu {}", self.resource.vcpu); if config.runtime.static_sandbox_resource_mgmt { hv.cpu_info.default_vcpus += self.resource.vcpu; - let new_vcpus_ceil = hv.cpu_info.default_vcpus.ceil() as u32; - if hv.cpu_info.default_maxvcpus < new_vcpus_ceil { - hv.cpu_info.default_maxvcpus = new_vcpus_ceil; - } } } + if config.runtime.static_sandbox_resource_mgmt { + let new_vcpus_ceil = hv.cpu_info.default_vcpus.ceil() as u32; + hv.cpu_info.default_maxvcpus = new_vcpus_ceil; + } + self.resource.orig_toml_default_mem = hv.memory_info.default_memory; if self.resource.mem_mb > 0 { info!(sl!(), "resource with memory {}", self.resource.mem_mb); @@ -461,6 +462,23 @@ mod tests { assert_eq!(hv.cpu_info.default_maxvcpus, 4); } + #[test] + fn test_setup_config_static_reduces_maxvcpus_to_static_total() { + let mut config = make_config(1.0, 8, 256, 4096, true); + let mut mgr = InitialSizeManager { + resource: InitialSize { + vcpu: 1.2, + mem_mb: 0, + orig_toml_default_mem: 0, + }, + }; + + mgr.setup_config(&mut config).unwrap(); + let hv = config.hypervisor.get("qemu").unwrap(); + assert_eq!(hv.cpu_info.default_vcpus, 2.2); + assert_eq!(hv.cpu_info.default_maxvcpus, 3); + } + #[test] fn test_setup_config_clamps_maxmemory() { let mut config = make_config(1.0, 4, 256, 300, true); From 5e58fabafcb441b7d25fdd8e174976b11c6f12f4 Mon Sep 17 00:00:00 2001 From: Saul Paredes Date: Sun, 17 May 2026 11:38:23 -0700 Subject: [PATCH 34/36] required-tests: disable tests incompatible with runtime-rs Due to importing resource management patches to runtime-rs, these tests: - run-nerdctl-tests (dragonball) - run-nydus (active, dragonball) - run-nydus (lts, dragonball) Are failing with: vmm action error: MachineConfig(InvalidVcpuCount(0)) Signed-off-by: Saul Paredes --- tools/testing/gatekeeper/required-tests.yaml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/testing/gatekeeper/required-tests.yaml b/tools/testing/gatekeeper/required-tests.yaml index c04473368bb3..77bc963e595b 100644 --- a/tools/testing/gatekeeper/required-tests.yaml +++ b/tools/testing/gatekeeper/required-tests.yaml @@ -54,14 +54,17 @@ mapping: - Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-containerd-stability (lts, qemu-runtime-rs) - Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-kata-agent-apis - Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-nerdctl-tests (cloud-hypervisor) - - Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-nerdctl-tests (dragonball) + # msft-preview: vmm action error: MachineConfig(InvalidVcpuCount(0)) + # - Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-nerdctl-tests (dragonball) - Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-nerdctl-tests (qemu) - Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-nerdctl-tests (qemu-runtime-rs) - Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-nydus (active, clh) - - Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-nydus (active, dragonball) + # msft-preview: vmm action error: MachineConfig(InvalidVcpuCount(0)) + # - Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-nydus (active, dragonball) - Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-nydus (active, qemu) - Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-nydus (lts, clh) - - Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-nydus (lts, dragonball) + # msft-preview: vmm action error: MachineConfig(InvalidVcpuCount(0)) + # - Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-nydus (lts, dragonball) - Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-nydus (lts, qemu) - Kata Containers CI / kata-containers-ci-on-push / run-cri-containerd-amd64 (active, cloud-hypervisor) / run-cri-containerd-amd64 (active, cloud-hypervisor) # msft-preview: pod memory limit too low: minimum 128MiB, got 0MiB From 6bc70526d40da3150fc4950be6ecc75bd29a4dd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bombo?= Date: Tue, 20 Jan 2026 14:58:55 -0600 Subject: [PATCH 35/36] runtime: Set `disable_guest_empty_dir = true` by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This makes the runtime share the host Kubelet emptyDir folder with the guest instead of the agent creating an empty folder in the container rootfs. Doing so enables the Kubelet to track emptyDir usage and evict greedy pods. In other words, with virtio-fs the container rootfs uses host storage whether this is true or false, however with true, Kata uses the k8s emptyDir folder so the sizeLimit is properly enforced by k8s. Addresses the emptyDir part of kata-containers/kata-containers#12203. Signed-off-by: Aurélien Bombo --- src/runtime/Makefile | 2 +- src/runtime/virtcontainers/fs_share_linux.go | 27 +++++++++++++++++--- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/src/runtime/Makefile b/src/runtime/Makefile index cc006a20db58..53805b0008d6 100644 --- a/src/runtime/Makefile +++ b/src/runtime/Makefile @@ -220,7 +220,7 @@ DEFBRIDGES := 1 DEFENABLEANNOTATIONS := [\"enable_iommu\", \"virtio_fs_extra_args\", \"kernel_params\", \"kernel_verity_params\"] DEFENABLEANNOTATIONS_COCO := [\"enable_iommu\", \"virtio_fs_extra_args\", \"kernel_params\", \"kernel_verity_params\", \"default_vcpus\", \"default_memory\", \"cc_init_data\"] DEFDISABLEGUESTSECCOMP := true -DEFDISABLEGUESTEMPTYDIR := false +DEFDISABLEGUESTEMPTYDIR := true #Default experimental features enabled DEFAULTEXPFEATURES := [] diff --git a/src/runtime/virtcontainers/fs_share_linux.go b/src/runtime/virtcontainers/fs_share_linux.go index 162d5ac532f0..a19fca05e32e 100644 --- a/src/runtime/virtcontainers/fs_share_linux.go +++ b/src/runtime/virtcontainers/fs_share_linux.go @@ -83,9 +83,13 @@ type FilesystemShare struct { configVolRegex *regexp.Regexp // Regex to match only the timestamped directory inside the k8's volume mount timestampDirRegex *regexp.Regexp - // The same volume mount can be shared by multiple containers in the same sandbox (pod) - srcDstMap map[string][]string - srcDstMapLock sync.Mutex + // srcDstMap tracks file-level source to destination mappings for configmap/secret watching + srcDstMap map[string][]string + srcDstMapLock sync.Mutex + // srcGuestMap caches volume source path to guest path, enabling multiple containers + // in the same pod to share the same volume mount + srcGuestMap map[string]string + srcGuestMapLock sync.Mutex eventLoopStarted bool eventLoopStartedLock sync.Mutex watcherDoneChannel chan bool @@ -108,6 +112,7 @@ func NewFilesystemShare(s *Sandbox) (*FilesystemShare, error) { sandbox: s, watcherDoneChannel: make(chan bool), srcDstMap: make(map[string][]string), + srcGuestMap: make(map[string]string), watcher: watcher, configVolRegex: configVolRegex, timestampDirRegex: timestampDirRegex, @@ -309,6 +314,13 @@ func (f *FilesystemShare) ShareFile(ctx context.Context, c *Container, m *Mount) // bind mount it in the shared directory. caps := f.sandbox.hypervisor.Capabilities(ctx) if !caps.IsFsSharingSupported() { + f.srcGuestMapLock.Lock() + if guestPath, ok := f.srcGuestMap[m.Source]; ok { + f.srcGuestMapLock.Unlock() + return &SharedFile{guestPath: guestPath}, nil + } + f.srcGuestMapLock.Unlock() + f.Logger().Debug("filesystem sharing is not supported, files will be copied") var ignored bool @@ -418,6 +430,11 @@ func (f *FilesystemShare) ShareFile(ctx context.Context, c *Container, m *Mount) m.HostPath = mountDest } + // Cache the guestPath for this volume source so other containers can share it + f.srcGuestMapLock.Lock() + defer f.srcGuestMapLock.Unlock() + f.srcGuestMap[m.Source] = guestPath + return &SharedFile{ guestPath: guestPath, }, nil @@ -442,6 +459,10 @@ func (f *FilesystemShare) UnshareFile(ctx context.Context, c *Container, m *Moun } } + f.srcGuestMapLock.Lock() + delete(f.srcGuestMap, m.Source) + f.srcGuestMapLock.Unlock() + return nil } From e2669e84b321bdcdf433ca9ebce6cab48494ca04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bombo?= Date: Tue, 26 May 2026 11:11:06 -0500 Subject: [PATCH 36/36] runtime-rs: Set disable_guest_empty_dir = true by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Porting the runtime-go change. Signed-off-by: Aurélien Bombo --- src/runtime-rs/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime-rs/Makefile b/src/runtime-rs/Makefile index 2ba05e1d4a52..719ebf402db4 100644 --- a/src/runtime-rs/Makefile +++ b/src/runtime-rs/Makefile @@ -187,7 +187,7 @@ DEFNETQUEUES := 1 DEFENABLEANNOTATIONS := [\"enable_iommu\", \"virtio_fs_extra_args\", \"kernel_params\", \"kernel_verity_params\", \"default_vcpus\", \"default_memory\"] DEFENABLEANNOTATIONS_COCO := [\"enable_iommu\", \"virtio_fs_extra_args\", \"kernel_params\", \"kernel_verity_params\", \"default_vcpus\", \"default_memory\", \"cc_init_data\"] DEFDISABLEGUESTSECCOMP := true -DEFDISABLEGUESTEMPTYDIR := false +DEFDISABLEGUESTEMPTYDIR := true ##VAR DEFAULTEXPFEATURES=[features] Default experimental features enabled DEFAULTEXPFEATURES := [] DEFDISABLESELINUX := false