From 8c821542fcb82d03d12b72bf3cb041d099b6ac90 Mon Sep 17 00:00:00 2001 From: Harshit Gupta Date: Mon, 4 May 2026 08:40:32 +0000 Subject: [PATCH 01/17] Implement SaveVM and PauseVM fns. for CLH The CLH impl of Hypervisor iface was missing Save and Pause functions that are crucial for creating a template. This commit implements those functions and snapshot is getting saved when the template initialization finishes. Signed-off-by: Harshit Gupta --- src/runtime/virtcontainers/clh.go | 44 +++++++++++++++++++++++++- src/runtime/virtcontainers/clh_test.go | 10 ++++++ 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 3268492eded8..a19f5531216e 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -112,6 +112,10 @@ type clhClient interface { VmAddDevicePut(ctx context.Context, deviceConfig chclient.DeviceConfig) (chclient.PciDeviceInfo, *http.Response, error) // Add a new disk device to the VM VmAddDiskPut(ctx context.Context, diskConfig chclient.DiskConfig) (chclient.PciDeviceInfo, *http.Response, error) + // Pause the VM + VmPausePut(ctx context.Context) (*http.Response, error) + // Create a snapshot of the VM + VmSnapshotPut(ctx context.Context, vmSnapshotConfig chclient.VmSnapshotConfig) (*http.Response, error) // Remove a device from the VM VmRemoveDevicePut(ctx context.Context, vmRemoveDevice chclient.VmRemoveDevice) (*http.Response, error) } @@ -153,6 +157,14 @@ func (c *clhClientApi) VmAddDiskPut(ctx context.Context, diskConfig chclient.Dis return c.ApiInternal.VmAddDiskPut(ctx).DiskConfig(diskConfig).Execute() } +func (c *clhClientApi) VmPausePut(ctx context.Context) (*http.Response, error) { + return c.ApiInternal.PauseVM(ctx).Execute() +} + +func (c *clhClientApi) VmSnapshotPut(ctx context.Context, vmSnapshotConfig chclient.VmSnapshotConfig) (*http.Response, error) { + return c.ApiInternal.VmSnapshotPut(ctx).VmSnapshotConfig(vmSnapshotConfig).Execute() +} + func (c *clhClientApi) VmRemoveDevicePut(ctx context.Context, vmRemoveDevice chclient.VmRemoveDevice) (*http.Response, error) { return c.ApiInternal.VmRemoveDevicePut(ctx).VmRemoveDevice(vmRemoveDevice).Execute() } @@ -1284,11 +1296,41 @@ func (clh *cloudHypervisor) Cleanup(ctx context.Context) error { func (clh *cloudHypervisor) PauseVM(ctx context.Context) error { clh.Logger().WithField("function", "PauseVM").Info("Pause Sandbox") + + cl := clh.client() + ctx, cancel := context.WithTimeout(ctx, clh.getClhAPITimeout()*time.Second) + defer cancel() + + _, err := cl.VmPausePut(ctx) + if err != nil { + clh.Logger().WithError(err).Error("Failed to pause VM") + return openAPIClientError(err) + } + return nil } func (clh *cloudHypervisor) SaveVM() error { - clh.Logger().WithField("function", "saveSandboxC").Info("Save Sandbox") + clh.Logger().WithField("function", "SaveVM").Info("Save Sandbox") + + cl := clh.client() + ctx, cancel := context.WithTimeout(context.Background(), clh.getClhAPITimeout()*time.Second) + defer cancel() + + // Create snapshot config with file URL to template path + // Use MemoryPath as base for snapshot destination + // When creating a template, the MemoryPath is set to the template path, so we can use it to save the snapshot. + fileURL := "file://" + filepath.Dir(clh.config.MemoryPath) + + vmSnapshotConfig := *chclient.NewVmSnapshotConfig() + vmSnapshotConfig.SetDestinationUrl(fileURL) + + _, err := cl.VmSnapshotPut(ctx, vmSnapshotConfig) + if err != nil { + clh.Logger().WithError(err).Error("Failed to save VM snapshot") + return openAPIClientError(err) + } + return nil } diff --git a/src/runtime/virtcontainers/clh_test.go b/src/runtime/virtcontainers/clh_test.go index 5849ed76fbb2..b42c1e391ebc 100644 --- a/src/runtime/virtcontainers/clh_test.go +++ b/src/runtime/virtcontainers/clh_test.go @@ -115,6 +115,16 @@ func (c *clhClientMock) VmAddDiskPut(ctx context.Context, diskConfig chclient.Di return chclient.PciDeviceInfo{Bdf: "0000:00:0a.0"}, nil, nil } +//nolint:golint +func (c *clhClientMock) VmPausePut(ctx context.Context) (*http.Response, error) { + return nil, nil +} + +//nolint:golint +func (c *clhClientMock) VmSnapshotPut(ctx context.Context, vmSnapshotConfig chclient.VmSnapshotConfig) (*http.Response, error) { + return nil, nil +} + //nolint:golint func (c *clhClientMock) VmRemoveDevicePut(ctx context.Context, vmRemoveDevice chclient.VmRemoveDevice) (*http.Response, error) { return nil, nil From 5777b04a6570156badf37536e1e5bc3272ef5463 Mon Sep 17 00:00:00 2001 From: Harshit Gupta Date: Thu, 7 May 2026 18:39:25 +0000 Subject: [PATCH 02/17] Implement VM Restore functionality for CLH Signed-off-by: Harshit Gupta --- src/runtime/virtcontainers/clh.go | 60 ++++++++++++++++++++++++++ src/runtime/virtcontainers/clh_test.go | 56 +++++++++++++++++++++++- 2 files changed, 115 insertions(+), 1 deletion(-) diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index a19f5531216e..19d7fa261610 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -68,6 +68,7 @@ const ( const ( clhStateCreated = "Created" clhStateRunning = "Running" + clhStatePaused = "Paused" ) const ( @@ -84,6 +85,7 @@ const ( clhHotPlugAPITimeout = 5 clhStopSandboxTimeout = 3 clhStopSandboxTimeoutConfidentialGuest = 10 + clhRestoreTimeout = 2 clhSocket = "clh.sock" clhAPISocket = "clh-api.sock" virtioFsSocket = "virtiofsd.sock" @@ -118,6 +120,8 @@ type clhClient interface { VmSnapshotPut(ctx context.Context, vmSnapshotConfig chclient.VmSnapshotConfig) (*http.Response, error) // Remove a device from the VM VmRemoveDevicePut(ctx context.Context, vmRemoveDevice chclient.VmRemoveDevice) (*http.Response, error) + // Restore VM from a snapshot + VmRestorePut(ctx context.Context, restoreConfig chclient.RestoreConfig) (*http.Response, error) } type clhClientApi struct { @@ -169,6 +173,10 @@ func (c *clhClientApi) VmRemoveDevicePut(ctx context.Context, vmRemoveDevice chc return c.ApiInternal.VmRemoveDevicePut(ctx).VmRemoveDevice(vmRemoveDevice).Execute() } +func (c *clhClientApi) VmRestorePut(ctx context.Context, restoreConfig chclient.RestoreConfig) (*http.Response, error) { + return c.ApiInternal.VmRestorePut(ctx).RestoreConfig(restoreConfig).Execute() +} + // This is done in order to be able to override such a function as part of // our unit tests, as when testing bootVM we're on a mocked scenario already. var vmAddNetPutRequest = func(clh *cloudHypervisor) ([]chclient.PciDeviceInfo, error) { @@ -1783,6 +1791,58 @@ func (clh *cloudHypervisor) bootVM(ctx context.Context) error { return nil } +func (clh *cloudHypervisor) restoreVM(ctx context.Context) error { + clh.Logger().Info("Restoring VM from template") + + cl := clh.client() + + // use the VMStorePath as the base for the restore source URL + snapshotDir := clh.config.VMStorePath + + // check if the snapshot directory contains the state.json and config.json files + // which contain the VM state and configuration respectively + stateFile := filepath.Join(snapshotDir, "state.json") + configFile := filepath.Join(snapshotDir, "config.json") + + if _, err := os.Stat(stateFile); err != nil { + return fmt.Errorf("Failed to access state file %s: %v", stateFile, err) + } + + if _, err := os.Stat(configFile); err != nil { + return fmt.Errorf("Failed to access config file %s: %v", configFile, err) + } + + // Prepare restore configuration + sourceURL := "file://" + snapshotDir + restoreConfig := *chclient.NewRestoreConfig(sourceURL) + + clh.Logger().WithField("sourceURL", sourceURL).Debug("Restore configuration") + + // Restore VM from template + ctxWithTimeout, cancelRestore := context.WithTimeout(ctx, clhRestoreTimeout*time.Second) + defer cancelRestore() + _, err := cl.VmRestorePut(ctxWithTimeout, restoreConfig) + if err != nil { + clh.Logger().WithError(err).Error("Failed to restore VM from template") + return openAPIClientError(err) + } + + // Check VM state after restoration + info, err := clh.vmInfo() + if err != nil { + return err + } + + clh.Logger().Debugf("VM state after restore: %#v", info) + + if info.State != clhStatePaused { + clh.Logger().Warnf("VM state is '%s' after restore, expected 'Paused'", info.State) + } + + clh.Logger().Info("Successfully restored VM from template") + return nil +} + func (clh *cloudHypervisor) addVSock(cid int64, path string) { clh.Logger().WithFields(log.Fields{ "path": path, diff --git a/src/runtime/virtcontainers/clh_test.go b/src/runtime/virtcontainers/clh_test.go index b42c1e391ebc..c4608ebe2374 100644 --- a/src/runtime/virtcontainers/clh_test.go +++ b/src/runtime/virtcontainers/clh_test.go @@ -74,7 +74,8 @@ func newClhConfig() (HypervisorConfig, error) { } type clhClientMock struct { - vmInfo chclient.VmInfo + vmInfo chclient.VmInfo + restoreRequest *chclient.RestoreConfig } func (c *clhClientMock) VmmPingGet(ctx context.Context) (chclient.VmmPingResponse, *http.Response, error) { @@ -130,6 +131,13 @@ func (c *clhClientMock) VmRemoveDevicePut(ctx context.Context, vmRemoveDevice ch return nil, nil } +func (c *clhClientMock) VmRestorePut(ctx context.Context, restoreConfig chclient.RestoreConfig) (*http.Response, error) { + c.restoreRequest = &restoreConfig + // restoreVM() verifies Paused after restore. + c.vmInfo.State = clhStatePaused + return nil, nil +} + func TestCloudHypervisorAddVSock(t *testing.T) { assert := assert.New(t) clh := cloudHypervisor{} @@ -526,6 +534,52 @@ func TestClhCreateVM(t *testing.T) { } } +func TestClhRestoreVM(t *testing.T) { + assert := assert.New(t) + + store, err := persist.GetDriver() + assert.NoError(err) + + clhConfig, err := newClhConfig() + assert.NoError(err) + clhConfig.VMStorePath = store.RunVMStoragePath() + clhConfig.RunStorePath = store.RunStoragePath() + + mockClient := &clhClientMock{} + clh := &cloudHypervisor{ + config: clhConfig, + APIClient: mockClient, + } + + // First call restoreVM without the VM snapshot files (state.json, config.json) present. + err = clh.restoreVM(context.Background()) + // An error is expected because restoreVM expects the VM snapshot files to be present. + assert.Error(err) + assert.Contains(err.Error(), filepath.Join(clhConfig.VMStorePath, "state.json")) + + // Now create the VM snapshot files and call restoreVM again. + os.MkdirAll(clhConfig.VMStorePath, os.ModePerm) + stateFile := filepath.Join(clhConfig.VMStorePath, "state.json") + configFile := filepath.Join(clhConfig.VMStorePath, "config.json") + err = os.WriteFile(stateFile, []byte("{}"), 0o600) + assert.NoError(err) + err = os.WriteFile(configFile, []byte("{}"), 0o600) + assert.NoError(err) + + // Call restoreVM again, this time it should succeed. + err = clh.restoreVM(context.Background()) + assert.NoError(err) + + if assert.NotNil(mockClient.restoreRequest) { + expectedSourceURL := "file://" + clhConfig.VMStorePath + assert.Equal(expectedSourceURL, mockClient.restoreRequest.GetSourceUrl()) + } + + info, err := clh.vmInfo() + assert.NoError(err) + assert.Equal(clhStatePaused, info.State) +} + func TestCloudHypervisorStartSandbox(t *testing.T) { assert := assert.New(t) clhConfig, err := newClhConfig() From 45ec1d9a85c20ed6f1de3f6e9bf1832567973de5 Mon Sep 17 00:00:00 2001 From: Harshit Gupta Date: Thu, 7 May 2026 18:42:07 +0000 Subject: [PATCH 03/17] Implement ResumeVM fn. for CLH Signed-off-by: Harshit Gupta --- src/runtime/virtcontainers/clh.go | 16 ++++++++++++++++ src/runtime/virtcontainers/clh_test.go | 4 ++++ 2 files changed, 20 insertions(+) diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 19d7fa261610..3f11d3fb0fe6 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -122,6 +122,8 @@ type clhClient interface { VmRemoveDevicePut(ctx context.Context, vmRemoveDevice chclient.VmRemoveDevice) (*http.Response, error) // Restore VM from a snapshot VmRestorePut(ctx context.Context, restoreConfig chclient.RestoreConfig) (*http.Response, error) + // Resume a paused VM + ResumeVM(ctx context.Context) (*http.Response, error) } type clhClientApi struct { @@ -177,6 +179,10 @@ func (c *clhClientApi) VmRestorePut(ctx context.Context, restoreConfig chclient. return c.ApiInternal.VmRestorePut(ctx).RestoreConfig(restoreConfig).Execute() } +func (c *clhClientApi) ResumeVM(ctx context.Context) (*http.Response, error) { + return c.ApiInternal.ResumeVM(ctx).Execute() +} + // This is done in order to be able to override such a function as part of // our unit tests, as when testing bootVM we're on a mocked scenario already. var vmAddNetPutRequest = func(clh *cloudHypervisor) ([]chclient.PciDeviceInfo, error) { @@ -1344,6 +1350,16 @@ func (clh *cloudHypervisor) SaveVM() error { func (clh *cloudHypervisor) ResumeVM(ctx context.Context) error { clh.Logger().WithField("function", "ResumeVM").Info("Resume Sandbox") + cl := clh.client() + ctx, cancel := context.WithTimeout(ctx, clh.getClhAPITimeout()*time.Second) + defer cancel() + + _, err := cl.ResumeVM(ctx) + if err != nil { + clh.Logger().WithError(err).Error("Failed to resume VM") + return openAPIClientError(err) + } + return nil } diff --git a/src/runtime/virtcontainers/clh_test.go b/src/runtime/virtcontainers/clh_test.go index c4608ebe2374..c38e317fcac2 100644 --- a/src/runtime/virtcontainers/clh_test.go +++ b/src/runtime/virtcontainers/clh_test.go @@ -138,6 +138,10 @@ func (c *clhClientMock) VmRestorePut(ctx context.Context, restoreConfig chclient return nil, nil } +func (c *clhClientMock) ResumeVM(ctx context.Context) (*http.Response, error) { + return nil, nil +} + func TestCloudHypervisorAddVSock(t *testing.T) { assert := assert.New(t) clh := cloudHypervisor{} From 83fc837ffa478124c9b11f088ffe678d021f30d8 Mon Sep 17 00:00:00 2001 From: Harshit Gupta Date: Thu, 7 May 2026 14:47:56 +0000 Subject: [PATCH 04/17] Set the vmInfo state after mock Pause and Resume calls Signed-off-by: Harshit Gupta --- src/runtime/virtcontainers/clh_test.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/runtime/virtcontainers/clh_test.go b/src/runtime/virtcontainers/clh_test.go index c38e317fcac2..cbb3823a6ada 100644 --- a/src/runtime/virtcontainers/clh_test.go +++ b/src/runtime/virtcontainers/clh_test.go @@ -118,6 +118,7 @@ func (c *clhClientMock) VmAddDiskPut(ctx context.Context, diskConfig chclient.Di //nolint:golint func (c *clhClientMock) VmPausePut(ctx context.Context) (*http.Response, error) { + c.vmInfo.State = clhStatePaused return nil, nil } @@ -139,6 +140,7 @@ func (c *clhClientMock) VmRestorePut(ctx context.Context, restoreConfig chclient } func (c *clhClientMock) ResumeVM(ctx context.Context) (*http.Response, error) { + c.vmInfo.State = clhStateRunning return nil, nil } From c06639321f9edd676ed941c838e17f8347806c95 Mon Sep 17 00:00:00 2001 From: Harshit Gupta Date: Thu, 7 May 2026 18:29:09 +0000 Subject: [PATCH 05/17] Add test for SaveVM function Signed-off-by: Harshit Gupta --- src/runtime/virtcontainers/clh_test.go | 34 ++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/src/runtime/virtcontainers/clh_test.go b/src/runtime/virtcontainers/clh_test.go index cbb3823a6ada..46f3ff0e9f07 100644 --- a/src/runtime/virtcontainers/clh_test.go +++ b/src/runtime/virtcontainers/clh_test.go @@ -74,8 +74,9 @@ func newClhConfig() (HypervisorConfig, error) { } type clhClientMock struct { - vmInfo chclient.VmInfo - restoreRequest *chclient.RestoreConfig + vmInfo chclient.VmInfo + restoreRequest *chclient.RestoreConfig + snapshotRequest *chclient.VmSnapshotConfig } func (c *clhClientMock) VmmPingGet(ctx context.Context) (chclient.VmmPingResponse, *http.Response, error) { @@ -124,6 +125,7 @@ func (c *clhClientMock) VmPausePut(ctx context.Context) (*http.Response, error) //nolint:golint func (c *clhClientMock) VmSnapshotPut(ctx context.Context, vmSnapshotConfig chclient.VmSnapshotConfig) (*http.Response, error) { + c.snapshotRequest = &vmSnapshotConfig return nil, nil } @@ -586,6 +588,34 @@ func TestClhRestoreVM(t *testing.T) { assert.Equal(clhStatePaused, info.State) } +func TestClhSaveVM(t *testing.T) { + assert := assert.New(t) + + store, err := persist.GetDriver() + assert.NoError(err) + + clhConfig, err := newClhConfig() + assert.NoError(err) + // For testing, assume the memory path is located within the VM store path. + clhConfig.MemoryPath = filepath.Join(store.RunVMStoragePath(), "memory") + clhConfig.VMStorePath = store.RunVMStoragePath() + clhConfig.RunStorePath = store.RunStoragePath() + + mockClient := &clhClientMock{} + clh := &cloudHypervisor{ + config: clhConfig, + APIClient: mockClient, + } + + err = clh.SaveVM() + assert.NoError(err) + + if assert.NotNil(mockClient.snapshotRequest) { + expectedDestinationURL := "file://" + filepath.Dir(clhConfig.MemoryPath) + assert.Equal(expectedDestinationURL, mockClient.snapshotRequest.GetDestinationUrl()) + } +} + func TestCloudHypervisorStartSandbox(t *testing.T) { assert := assert.New(t) clhConfig, err := newClhConfig() From bc65ce6eb395219fdd39360ff38cb32f244583cb Mon Sep 17 00:00:00 2001 From: Harshit Gupta Date: Mon, 4 May 2026 03:10:18 +0000 Subject: [PATCH 06/17] Remove initrd image check for VM templating Although the official Kata docs require initrd image with VM templating. However, even with a root disk image the overlay upper layer is created in memory. Which will be captured in the VM snapshot. Therefore, the initrd image constraint does not apply for VM templating. Update the test TestCheckFactoryConfig to not expect error when template is enabled and RootFS image is specified. Signed-off-by: Harshit Gupta --- src/runtime/pkg/katautils/config.go | 6 ------ src/runtime/pkg/katautils/config_test.go | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index 184556305210..95b1e4547273 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -1958,12 +1958,6 @@ func checkNetNsConfig(config oci.RuntimeConfig) error { // checkFactoryConfig ensures the VM factory configuration is valid. func checkFactoryConfig(config oci.RuntimeConfig) error { - if config.FactoryConfig.Template { - if config.HypervisorConfig.InitrdPath == "" { - return errors.New("Factory option enable_template requires an initrd image") - } - } - if config.FactoryConfig.VMCacheNumber > 0 { if config.HypervisorType != vc.QemuHypervisor { return errors.New("VM cache just support qemu") diff --git a/src/runtime/pkg/katautils/config_test.go b/src/runtime/pkg/katautils/config_test.go index 1415168976ce..23621a367a75 100644 --- a/src/runtime/pkg/katautils/config_test.go +++ b/src/runtime/pkg/katautils/config_test.go @@ -1626,7 +1626,7 @@ func TestCheckFactoryConfig(t *testing.T) { {false, false, "", "initrd"}, {true, false, "", "initrd"}, - {true, true, "image", ""}, + {true, false, "image", ""}, } for i, d := range data { From 4a0f124e7133eb6bf05799f97ad424e53b856f51 Mon Sep 17 00:00:00 2001 From: Harshit Gupta Date: Tue, 12 May 2026 18:27:08 +0000 Subject: [PATCH 07/17] Truncate VM Template's memory file Create the VM Template's memory file as an empty file with size equal to that of the VM's memory size. Signed-off-by: Harshit Gupta --- .../virtcontainers/factory/template/template_linux.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/runtime/virtcontainers/factory/template/template_linux.go b/src/runtime/virtcontainers/factory/template/template_linux.go index d48ce5c50bf9..2d1d259265fc 100644 --- a/src/runtime/virtcontainers/factory/template/template_linux.go +++ b/src/runtime/virtcontainers/factory/template/template_linux.go @@ -115,6 +115,15 @@ func (t *template) prepareTemplateFiles() error { } f.Close() + // truncate the memory file to the exact size of the VM memory + memoryInBytes := int64(t.config.HypervisorConfig.MemorySize) * 1024 * 1024 + t.Logger().Infof("truncating memory file %s to %d bytes", t.statePath+"/memory", memoryInBytes) + err = os.Truncate(t.statePath+"/memory", memoryInBytes) + if err != nil { + t.close() + return err + } + return nil } From 0417a3ac9d49c27da5c931b4dff944db02edf8ea Mon Sep 17 00:00:00 2001 From: Harshit Gupta Date: Mon, 4 May 2026 09:12:19 +0000 Subject: [PATCH 08/17] Set MemoryZone config for Template-related VMs When VM is booted to be template or booted from template, its memory is to be backed by a file. This commit updates the memory config to use a file in both cases. Signed-off-by: Harshit Gupta --- src/runtime/virtcontainers/clh.go | 73 +++++++++++++++++++++---------- 1 file changed, 51 insertions(+), 22 deletions(-) diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 3f11d3fb0fe6..75957503ebc8 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -585,28 +585,57 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net } } - // Create the VM memory config via the constructor to ensure default values are properly assigned - clh.vmconfig.Memory = chclient.NewMemoryConfig(int64((utils.MemUnit(clh.config.MemorySize) * utils.MiB).ToBytes())) - // Memory config shared is to be enabled when using vhost_user backends, ex. virtio-fs - // or when using HugePages. - // If such features are disabled, turn off shared memory config. - if clh.config.SharedFS == config.NoSharedFS && !clh.config.HugePages { - clh.vmconfig.Memory.Shared = func(b bool) *bool { return &b }(false) - } else { - clh.vmconfig.Memory.Shared = func(b bool) *bool { return &b }(true) - } - // Enable hugepages if needed - clh.vmconfig.Memory.Hugepages = func(b bool) *bool { return &b }(clh.config.HugePages) - if !clh.config.ConfidentialGuest { - hotplugSize := clh.config.DefaultMaxMemorySize - // OpenAPI only supports int64 values - clh.vmconfig.Memory.HotplugSize = func(i int64) *int64 { return &i }(int64((utils.MemUnit(hotplugSize) * utils.MiB).ToBytes())) - - if clh.config.ReclaimGuestFreedMemory { - // Create VM with a balloon config so we can enable free page reporting (size of the balloon can be set to zero) - clh.vmconfig.Balloon = chclient.NewBalloonConfig(0) - // Set the free page reporting flag for ballooning to be true - clh.vmconfig.Balloon.SetFreePageReporting(true) + // If the VM is booting from a template, or if the VM is going to be used as a template + // the memory is to be backed by a file, so we need to configure the memory zones accordingly. + if clh.config.BootFromTemplate || clh.config.BootToBeTemplate { + // Double-check that the clh.config.MemoryPath file exists before using it in the VM config, to avoid hitting a less clear error from cloud hypervisor when it tries to access the non-existing memory file. + if _, err := os.Stat(clh.config.MemoryPath); os.IsNotExist(err) { + return fmt.Errorf("memory file %s does not exist", clh.config.MemoryPath) + } + + // Set the size to be 0 since we are going to configure actual size via zones + clh.vmconfig.Memory = chclient.NewMemoryConfig(0) + + memoryZoneConfig := chclient.NewMemoryZoneConfig("mem0", int64((utils.MemUnit(clh.config.MemorySize) * utils.MiB).ToBytes())) + if clh.config.BootToBeTemplate { + // When BootToBeTemplate is true, the memory file backing the VM memory is shared between multiple VMs created from the same template. + // So we need to set shared to true in this case. + memoryZoneConfig.SetShared(true) + clh.vmconfig.Memory.Shared = func(b bool) *bool { return &b }(true) + } else { + // When BootFromTemplate is true, set shared=false to ensure Copy-On-Write is used for the memory file. + // So that the VM can have its own private memory. + memoryZoneConfig.SetShared(false) + clh.vmconfig.Memory.Shared = func(b bool) *bool { return &b }(false) + } + memoryZoneConfig.SetFile(clh.config.MemoryPath) + clh.vmconfig.Memory.Zones = &[]chclient.MemoryZoneConfig{ + *memoryZoneConfig, + } + } else { // Normal (non-template) VM creation + // Create the VM memory config via the constructor to ensure default values are properly assigned + clh.vmconfig.Memory = chclient.NewMemoryConfig(int64((utils.MemUnit(clh.config.MemorySize) * utils.MiB).ToBytes())) + // Memory config shared is to be enabled when using vhost_user backends, ex. virtio-fs + // or when using HugePages. + // If such features are disabled, turn off shared memory config. + if clh.config.SharedFS == config.NoSharedFS && !clh.config.HugePages { + clh.vmconfig.Memory.Shared = func(b bool) *bool { return &b }(false) + } else { + clh.vmconfig.Memory.Shared = func(b bool) *bool { return &b }(true) + } + // Enable hugepages if needed + clh.vmconfig.Memory.Hugepages = func(b bool) *bool { return &b }(clh.config.HugePages) + if !clh.config.ConfidentialGuest { + hotplugSize := clh.config.DefaultMaxMemorySize + // OpenAPI only supports int64 values + clh.vmconfig.Memory.HotplugSize = func(i int64) *int64 { return &i }(int64((utils.MemUnit(hotplugSize) * utils.MiB).ToBytes())) + + if clh.config.ReclaimGuestFreedMemory { + // Create VM with a balloon config so we can enable free page reporting (size of the balloon can be set to zero) + clh.vmconfig.Balloon = chclient.NewBalloonConfig(0) + // Set the free page reporting flag for ballooning to be true + clh.vmconfig.Balloon.SetFreePageReporting(true) + } } } From 277f7e2482c0024a63e63c12850515ce21a0c738 Mon Sep 17 00:00:00 2001 From: Harshit Gupta Date: Tue, 12 May 2026 19:57:29 +0000 Subject: [PATCH 09/17] Set VMStorePath for the template VM The template VM is created with the default value for VMStorePath, causing the runtime to be unable to reach the CLH VM's API socket. This commit sets the VMStorePath to be equal to the VM's statePath, which is set to the `factory.template_path` config parameter. Signed-off-by: Harshit Gupta --- src/runtime/virtcontainers/factory/template/template_linux.go | 1 + 1 file changed, 1 insertion(+) diff --git a/src/runtime/virtcontainers/factory/template/template_linux.go b/src/runtime/virtcontainers/factory/template/template_linux.go index 2d1d259265fc..d82fbe4104b2 100644 --- a/src/runtime/virtcontainers/factory/template/template_linux.go +++ b/src/runtime/virtcontainers/factory/template/template_linux.go @@ -134,6 +134,7 @@ func (t *template) createTemplateVM(ctx context.Context) error { config.HypervisorConfig.BootFromTemplate = false config.HypervisorConfig.MemoryPath = t.statePath + "/memory" config.HypervisorConfig.DevicesStatePath = t.statePath + "/state" + config.HypervisorConfig.VMStorePath = t.statePath vm, err := vc.NewVM(ctx, config) if err != nil { From a03fb3494922a2ece59ed30cc67009dcbf683bb0 Mon Sep 17 00:00:00 2001 From: Harshit Gupta Date: Tue, 12 May 2026 20:33:58 +0000 Subject: [PATCH 10/17] Make template deviceStatePath configurable Make deviceStatePath calculation in VM template workflow configurable based on hypervisor, instead of hardcoding it to `state`. Signed-off-by: Harshit Gupta --- .../factory/template/template_linux.go | 16 +++++++++++++--- .../factory/template/template_test.go | 2 +- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/runtime/virtcontainers/factory/template/template_linux.go b/src/runtime/virtcontainers/factory/template/template_linux.go index d82fbe4104b2..2a23cdbccd43 100644 --- a/src/runtime/virtcontainers/factory/template/template_linux.go +++ b/src/runtime/virtcontainers/factory/template/template_linux.go @@ -11,6 +11,7 @@ import ( "context" "fmt" "os" + "path/filepath" "syscall" "time" @@ -133,7 +134,7 @@ func (t *template) createTemplateVM(ctx context.Context) error { config.HypervisorConfig.BootToBeTemplate = true config.HypervisorConfig.BootFromTemplate = false config.HypervisorConfig.MemoryPath = t.statePath + "/memory" - config.HypervisorConfig.DevicesStatePath = t.statePath + "/state" + config.HypervisorConfig.DevicesStatePath = t.deviceStatePath() config.HypervisorConfig.VMStorePath = t.statePath vm, err := vc.NewVM(ctx, config) @@ -171,7 +172,7 @@ func (t *template) createFromTemplateVM(ctx context.Context, c vc.VMConfig) (*vc config.HypervisorConfig.BootToBeTemplate = false config.HypervisorConfig.BootFromTemplate = true config.HypervisorConfig.MemoryPath = t.statePath + "/memory" - config.HypervisorConfig.DevicesStatePath = t.statePath + "/state" + config.HypervisorConfig.DevicesStatePath = t.deviceStatePath() config.HypervisorConfig.SharedPath = c.HypervisorConfig.SharedPath config.HypervisorConfig.VMStorePath = c.HypervisorConfig.VMStorePath config.HypervisorConfig.RunStorePath = c.HypervisorConfig.RunStorePath @@ -185,6 +186,15 @@ func (t *template) checkTemplateVM() error { return err } - _, err = os.Stat(t.statePath + "/state") + _, err = os.Stat(t.deviceStatePath()) return err } + +func (t *template) deviceStatePath() string { + stateFileName := "state" + if t.config.HypervisorType == vc.ClhHypervisor { + stateFileName = "state.json" + } + + return filepath.Join(t.statePath, stateFileName) +} diff --git a/src/runtime/virtcontainers/factory/template/template_test.go b/src/runtime/virtcontainers/factory/template/template_test.go index c067c793e642..75d9733cc093 100644 --- a/src/runtime/virtcontainers/factory/template/template_test.go +++ b/src/runtime/virtcontainers/factory/template/template_test.go @@ -87,7 +87,7 @@ func TestTemplateFactory(t *testing.T) { err = tt.checkTemplateVM() assert.Error(err) - _, err = os.Create(tt.statePath + "/state") + _, err = os.Create(tt.deviceStatePath()) assert.Nil(err) err = tt.checkTemplateVM() assert.Nil(err) From cbae2e6efd577d788b4106eb86c99a8f6777a9b3 Mon Sep 17 00:00:00 2001 From: Harshit Gupta Date: Tue, 12 May 2026 20:40:49 +0000 Subject: [PATCH 11/17] Expand resetHypervisorConfig Expand the scope of the resetHypervisorConfig function to include resetting sandbox name, namespace and the default max vcpus. Signed-off-by: Harshit Gupta --- src/runtime/virtcontainers/factory/factory_linux.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/runtime/virtcontainers/factory/factory_linux.go b/src/runtime/virtcontainers/factory/factory_linux.go index 4a0cfcfe38e2..c010916943f8 100644 --- a/src/runtime/virtcontainers/factory/factory_linux.go +++ b/src/runtime/virtcontainers/factory/factory_linux.go @@ -80,6 +80,9 @@ func resetHypervisorConfig(config *vc.VMConfig) { config.HypervisorConfig.SharedPath = "" config.HypervisorConfig.VMStorePath = "" config.HypervisorConfig.RunStorePath = "" + config.HypervisorConfig.SandboxName = "" + config.HypervisorConfig.SandboxNamespace = "" + config.HypervisorConfig.DefaultMaxVCPUs = 0 } // It's important that baseConfig and newConfig are passed by value! From 486490c383876ba0ddb94310ec86f16e1ae18b8c Mon Sep 17 00:00:00 2001 From: Harshit Gupta Date: Mon, 4 May 2026 10:24:48 +0000 Subject: [PATCH 12/17] Update CLH template VM config to set memory shared=false Update config.json for the CLH VM template to set memory shared=false. This forces the VMs created from the template to trigger CoW. Signed-off-by: Harshit Gupta --- src/runtime/virtcontainers/clh.go | 50 +++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 75957503ebc8..aca8216b4e32 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -1374,6 +1374,56 @@ func (clh *cloudHypervisor) SaveVM() error { return openAPIClientError(err) } + if clh.config.BootToBeTemplate { + // Update the config.json file in the snapshotDir to set memory shared=false + snapshotConfigPath := filepath.Join(snapshotDir, "config.json") + snapshotConfig, err := os.ReadFile(snapshotConfigPath) + if err != nil { + clh.Logger().WithError(err).Error("Failed to read snapshot config") + return err + } + + var snapshotConfigData map[string]interface{} + if err := json.Unmarshal(snapshotConfig, &snapshotConfigData); err != nil { + clh.Logger().WithError(err).Error("Failed to unmarshal snapshot config") + return err + } + + // Access the memory section and cast it to a map + if memorySection, ok := snapshotConfigData["memory"].(map[string]interface{}); ok { + memorySection["shared"] = false + // Do the same update for each element fo the "zones" array in the memorySection + if zones, ok := memorySection["zones"].([]interface{}); ok { + for _, zone := range zones { + if zoneMap, ok := zone.(map[string]interface{}); ok { + zoneMap["shared"] = false + } else { + clh.Logger().Error("Unable to access zone in snapshot config memory section") + return fmt.Errorf("invalid snapshot config structure: zone in memory section not found or invalid") + } + } + } else { + clh.Logger().Error("Unable to access zones array in snapshot config memory section") + return fmt.Errorf("invalid snapshot config structure: zones array in memory section not found or invalid") + } + } else { + clh.Logger().Error("Unable to access memory section in snapshot config") + return fmt.Errorf("invalid snapshot config structure: memory section not found or invalid") + } + + // Write the modified config back to file + modifiedConfig, err := json.Marshal(snapshotConfigData) + if err != nil { + clh.Logger().WithError(err).Error("Failed to marshal modified snapshot config") + return err + } + + if err := os.WriteFile(snapshotConfigPath, modifiedConfig, 0644); err != nil { + clh.Logger().WithError(err).Error("Failed to write modified snapshot config") + return err + } + } + return nil } From 08fff8ef0ab553595877c662e4685859ba2c4c8b Mon Sep 17 00:00:00 2001 From: Harshit Gupta Date: Tue, 12 May 2026 21:30:29 +0000 Subject: [PATCH 13/17] Restore CLH VM snapshot to create VM from template Add logic to restore VM from snapshot instead of starting new VM when VM template is enabled. Copy the config.json and state.json to the new VM's VmStorePath, and update the config to create the VSOCK device for Kata agent in the VmStorePath. Signed-off-by: Harshit Gupta --- src/runtime/virtcontainers/clh.go | 143 ++++++++++++++++++++++++++++-- 1 file changed, 135 insertions(+), 8 deletions(-) diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index aca8216b4e32..87c7c6ce4433 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -281,12 +281,14 @@ type CloudHypervisorState struct { PID int VirtiofsDaemonPid int state clhState + isRestoring bool } func (s *CloudHypervisorState) reset() { s.PID = 0 s.VirtiofsDaemonPid = 0 s.state = clhNotReady + s.isRestoring = false } type cloudHypervisor struct { @@ -527,7 +529,7 @@ func getNonUserDefinedKernelParams(rootfstype string, disableNvdimm bool, dax bo } // For cloudHypervisor this call only sets the internal structure up. -// The VM will be created and started through StartVM(). +// The VM will be created and started through StartVM(), or restored from template if template files exist. func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Network, hypervisorConfig *HypervisorConfig) error { clh.ctx = ctx @@ -754,9 +756,104 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net return err } + // Check if we should restore from template instead of creating new VM + if clh.config.BootFromTemplate && clh.shouldRestoreFromTemplate() { + clh.Logger().Info("Template files found, will restore VM instead of creating new") + // Mark this as a restore operation for StartVM to use RestoreVM instead + clh.state.isRestoring = true + return nil + } + return nil } +// shouldRestoreFromTemplate checks if template snapshot files exist and we should restore instead of creating new VM +func (clh *cloudHypervisor) shouldRestoreFromTemplate() bool { + // For template restore, we need the snapshot directory to contain the necessary files + // The snapshotDir is derived from the MemoryPath directory + snapshotDir := filepath.Dir(clh.config.MemoryPath) + + // Check for required template files (config.json and memory file) + configFile := filepath.Join(snapshotDir, "config.json") + memoryFile := clh.config.MemoryPath + + if _, err := os.Stat(configFile); os.IsNotExist(err) { + clh.Logger().WithField("configFile", configFile).Debug("Template config file not found") + return false + } + + if _, err := os.Stat(memoryFile); os.IsNotExist(err) { + clh.Logger().WithField("memoryFile", memoryFile).Debug("Template memory file not found") + return false + } + + clh.Logger().WithFields(log.Fields{ + "configFile": configFile, + "memoryFile": memoryFile, + }).Info("Template files found, can restore VM from template") + + return true +} + +// copyFile copies a file from src to dst +func (clh *cloudHypervisor) copyFile(src, dst string) error { + srcFile, err := os.Open(src) + if err != nil { + return err + } + defer srcFile.Close() + + dstFile, err := os.Create(dst) + if err != nil { + return err + } + defer dstFile.Close() + + _, err = io.Copy(dstFile, srcFile) + if err != nil { + return err + } + + return dstFile.Sync() +} + +// updateVsockSocketPath updates the vsock socket path in the config.json file +func (clh *cloudHypervisor) updateVsockSocketPath(configPath, vmID string) error { + // Read the config file + configData, err := os.ReadFile(configPath) + if err != nil { + return err + } + + var config map[string]interface{} + if err := json.Unmarshal(configData, &config); err != nil { + return err + } + + // Update vsock socket path if vsock exists + if vsock, ok := config["vsock"].(map[string]interface{}); ok { + // Generate new vsock socket path for this VM + newVsockPath, err := clh.vsockSocketPath(vmID) + if err != nil { + return err + } + vsock["socket"] = newVsockPath + + clh.Logger().WithFields(log.Fields{ + "vmID": vmID, + "newVsockPath": newVsockPath, + }).Debug("Updated vsock socket path in config.json") + } + + // Write the updated config back to file + updatedConfig, err := json.Marshal(config) + if err != nil { + return err + } + + return os.WriteFile(configPath, updatedConfig, 0644) +} + // setupInitdata prepares and attaches the initdata disk if present. func setupInitdata(clh *cloudHypervisor, hypervisorConfig *HypervisorConfig) error { if len(hypervisorConfig.Initdata) == 0 { @@ -825,8 +922,37 @@ func (clh *cloudHypervisor) StartVM(ctx context.Context, timeout int) error { ctx, cancel := context.WithTimeout(ctx, bootTimeout*time.Second) defer cancel() - if err := clh.bootVM(ctx); err != nil { - return err + // Check if we should restore from template or create new VM + if clh.state.isRestoring { + // Copy template files to VM directory + snapshotDir := filepath.Dir(clh.config.MemoryPath) + + // Copy config.json from template to VM directory + srcConfig := filepath.Join(snapshotDir, "config.json") + dstConfig := filepath.Join(vmPath, "config.json") + if err := clh.copyFile(srcConfig, dstConfig); err != nil { + return fmt.Errorf("failed to copy config.json: %v", err) + } + + // Copy state.json from template to VM directory + srcState := filepath.Join(snapshotDir, "state.json") + dstState := filepath.Join(vmPath, "state.json") + if err := clh.copyFile(srcState, dstState); err != nil { + return fmt.Errorf("failed to copy state.json: %v", err) + } + + // Update vsock socket path in the copied config.json + if err := clh.updateVsockSocketPath(dstConfig, clh.id); err != nil { + return fmt.Errorf("failed to update vsock socket path: %v", err) + } + + if err := clh.restoreVM(ctx); err != nil { + return err + } + } else { + if err := clh.bootVM(ctx); err != nil { + return err + } } clh.state.state = clhReady @@ -1360,10 +1486,11 @@ func (clh *cloudHypervisor) SaveVM() error { ctx, cancel := context.WithTimeout(context.Background(), clh.getClhAPITimeout()*time.Second) defer cancel() + snapshotDir := filepath.Dir(clh.config.MemoryPath) // Create snapshot config with file URL to template path // Use MemoryPath as base for snapshot destination // When creating a template, the MemoryPath is set to the template path, so we can use it to save the snapshot. - fileURL := "file://" + filepath.Dir(clh.config.MemoryPath) + fileURL := "file://" + snapshotDir vmSnapshotConfig := *chclient.NewVmSnapshotConfig() vmSnapshotConfig.SetDestinationUrl(fileURL) @@ -1892,12 +2019,13 @@ func (clh *cloudHypervisor) restoreVM(ctx context.Context) error { cl := clh.client() // use the VMStorePath as the base for the restore source URL - snapshotDir := clh.config.VMStorePath + vmPath := filepath.Join(clh.config.VMStorePath, clh.id) + sourceURL := "file://" + vmPath // check if the snapshot directory contains the state.json and config.json files // which contain the VM state and configuration respectively - stateFile := filepath.Join(snapshotDir, "state.json") - configFile := filepath.Join(snapshotDir, "config.json") + stateFile := filepath.Join(vmPath, "state.json") + configFile := filepath.Join(vmPath, "config.json") if _, err := os.Stat(stateFile); err != nil { return fmt.Errorf("Failed to access state file %s: %v", stateFile, err) @@ -1908,7 +2036,6 @@ func (clh *cloudHypervisor) restoreVM(ctx context.Context) error { } // Prepare restore configuration - sourceURL := "file://" + snapshotDir restoreConfig := *chclient.NewRestoreConfig(sourceURL) clh.Logger().WithField("sourceURL", sourceURL).Debug("Restore configuration") From 2dffe5f02fb97f8e00ac6afe6d2a478faec39518 Mon Sep 17 00:00:00 2001 From: Harshit Gupta Date: Wed, 13 May 2026 18:15:18 +0000 Subject: [PATCH 14/17] Use diff VM storage paths in template_test.go Use diff storage paths for diff VMs created from the same template. Add comments to the test. Signed-off-by: Harshit Gupta --- .../factory/template/template_test.go | 38 +++++++++++++++---- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/src/runtime/virtcontainers/factory/template/template_test.go b/src/runtime/virtcontainers/factory/template/template_test.go index 75d9733cc093..d22cb6e81bcd 100644 --- a/src/runtime/virtcontainers/factory/template/template_test.go +++ b/src/runtime/virtcontainers/factory/template/template_test.go @@ -57,15 +57,26 @@ func TestTemplateFactory(t *testing.T) { assert.NoError(err) defer hybridVSockTTRPCMock.Stop() - // New + // Create 2 sets of instance-specific directories for per-VM storage + runStorePath1 := t.TempDir() + vmStorePath1 := t.TempDir() + runStorePath2 := t.TempDir() + vmStorePath2 := t.TempDir() + + // Create a new Template Factory f, err := New(ctx, vmConfig, testDir) assert.Nil(err) // Config assert.Equal(f.Config(), vmConfig) - // GetBaseVM - vm, err := f.GetBaseVM(ctx, vmConfig) + // GetBaseVM with first instance paths + vmConfig1 := vmConfig + vmConfig1.HypervisorConfig.RunStorePath = runStorePath1 + vmConfig1.HypervisorConfig.VMStorePath = vmStorePath1 + + // Test the creation of a new VM from the template factory + vm, err := f.GetBaseVM(ctx, vmConfig1) assert.Nil(err) err = vm.Stop(ctx) @@ -79,6 +90,8 @@ func TestTemplateFactory(t *testing.T) { assert.Equal(tt.Config(), vmConfig) + // Checking that template VM check fails + // if the corresponding memory and state files are absent err = tt.checkTemplateVM() assert.Error(err) @@ -89,34 +102,45 @@ func TestTemplateFactory(t *testing.T) { _, err = os.Create(tt.deviceStatePath()) assert.Nil(err) + + // After creating state and memory files, checkTemplateVM should succeed err = tt.checkTemplateVM() assert.Nil(err) + // Recreate the template VM, which should succeed err = tt.createTemplateVM(ctx) assert.Nil(err) - vm, err = tt.GetBaseVM(ctx, vmConfig) + // Ensuring that directly calling template's GetBaseVM function + // returns a VM instance similar to the one returned by the factory's GetBaseVM function + vm, err = tt.GetBaseVM(ctx, vmConfig1) assert.Nil(err) err = vm.Stop(ctx) assert.Nil(err) - vm, err = f.GetBaseVM(ctx, vmConfig) + vm, err = f.GetBaseVM(ctx, vmConfig1) assert.Nil(err) err = vm.Stop(ctx) assert.Nil(err) + // Overwriting the template VM should succeed err = tt.createTemplateVM(ctx) assert.Nil(err) - vm, err = tt.GetBaseVM(ctx, vmConfig) + // Create second instance with different storage paths + vmConfig2 := vmConfig + vmConfig2.HypervisorConfig.RunStorePath = runStorePath2 + vmConfig2.HypervisorConfig.VMStorePath = vmStorePath2 + + vm, err = tt.GetBaseVM(ctx, vmConfig2) assert.Nil(err) err = vm.Stop(ctx) assert.Nil(err) - vm, err = f.GetBaseVM(ctx, vmConfig) + vm, err = f.GetBaseVM(ctx, vmConfig2) assert.Nil(err) err = vm.Stop(ctx) From 0233e8c6c8e29c9361a702fd9aff826e620424d8 Mon Sep 17 00:00:00 2001 From: Harshit Gupta Date: Thu, 14 May 2026 20:17:37 +0000 Subject: [PATCH 15/17] Add K8s E2E tests for template k8s-vm-templating-test.bats Implement the first draft of the VM Templating integration K8s tests. Signed-off-by: Harshit Gupta --- .../kubernetes/k8s-vm-templating-test.bats | 113 ++++++++++++++++++ .../kubernetes/run_kubernetes_tests.sh | 1 + 2 files changed, 114 insertions(+) create mode 100644 tests/integration/kubernetes/k8s-vm-templating-test.bats diff --git a/tests/integration/kubernetes/k8s-vm-templating-test.bats b/tests/integration/kubernetes/k8s-vm-templating-test.bats new file mode 100644 index 000000000000..f739e534e5ec --- /dev/null +++ b/tests/integration/kubernetes/k8s-vm-templating-test.bats @@ -0,0 +1,113 @@ +#!/usr/bin/env bats +# +# Copyright (c) 2024 Kata Containers +# +# SPDX-License-Identifier: Apache-2.0 +# +# Tests for Kata VM templating (factory) functionality in Kubernetes integration mode + +load "${BATS_TEST_DIRNAME}/lib.sh" +load "${BATS_TEST_DIRNAME}/../../common.bash" +load "${BATS_TEST_DIRNAME}/tests_common.sh" + +get_shim_config_file() { + case "${KATA_HYPERVISOR}" in + *-runtime-rs) + echo "/opt/kata/share/defaults/kata-containers/runtime-rs/runtimes/${KATA_HYPERVISOR}/configuration-${KATA_HYPERVISOR}.toml" + ;; + *) + echo "/opt/kata/share/defaults/kata-containers/runtimes/${KATA_HYPERVISOR}/configuration-${KATA_HYPERVISOR}.toml" + ;; + esac +} + +# With setup_file and teardown_file being used, we use >&3 in some places to direct output to the terminal +# setup_file is used in BATS for one-time initialization for all tests in the file +setup_file() { + if [[ "${KATA_HYPERVISOR}" == *-runtime-rs ]]; then + export skip_vm_templating_tests=true + return 0 + fi + + setup_common || die "setup_common failed" + config_file="$(get_shim_config_file)" + backup_file="${config_file}.bats-vm-templating.bak" + + # Get ALL kata nodes + mapfile -t all_nodes < <(kubectl get nodes -l katacontainers.io/kata-runtime=true -o name | sed 's|^node/||') + [[ "${#all_nodes[@]}" -gt 0 ]] || die "No Kata nodes found" + + export all_nodes config_file backup_file + + # Configure and initialize VM templates on all Kata nodes + for n in "${all_nodes[@]}"; do + echo "Configuring and initializing VM template on node: $n" >&3 + exec_host "$n" "sudo test -f '${backup_file}' || sudo cp '${config_file}' '${backup_file}'" || die "Failed to backup kata config on node $n" + exec_host "$n" "sudo sed -i -e 's|^#\\?enable_template[[:space:]]*=.*$|enable_template = true|g' -e 's|^#\\?template_path[[:space:]]*=.*$|template_path = \"/run/vc/vm/template\"|g' -e 's|^#\\?shared_fs[[:space:]]*=.*$|shared_fs = \"none\"|g' '${config_file}'" || die "Failed to update kata config on node $n" + exec_host "$n" "sudo grep -q '^enable_template[[:space:]]*=' '${config_file}' || echo 'enable_template = true' | sudo tee -a '${config_file}' >/dev/null" || die "Failed to set enable_template on node $n" + exec_host "$n" "sudo grep -q '^template_path[[:space:]]*=' '${config_file}' || echo 'template_path = \"/run/vc/vm/template\"' | sudo tee -a '${config_file}' >/dev/null" || die "Failed to set template_path on node $n" + exec_host "$n" "sudo grep -q '^shared_fs[[:space:]]*=' '${config_file}' || echo 'shared_fs = \"none\"' | sudo tee -a '${config_file}' >/dev/null" || die "Failed to set shared_fs on node $n" + exec_host "$n" "sudo kata-runtime factory init" || die "Failed to initialize VM template on node $n" + done + + echo "VM templates initialized on ${#all_nodes[@]} nodes" >&3 +} + +setup() { + if [[ "${skip_vm_templating_tests:-false}" == "true" ]]; then + skip "VM templating test is only supported for Go runtime" + fi + + # Select one node for this test + setup_common || die "setup_common failed" +} + +@test "VM template factory is initialized" { + # Verify factory state on each node + for n in "${all_nodes[@]}"; do + exec_host "$n" "test -d /run/vc/vm/template" || skip "VM template directory not found on $n" + done +} + +@test "Pod can be created with templated VM" { + pod_name="test-templated-pod" + ctr_name="test-container" + + pod_config=$(mktemp --tmpdir pod_config.XXXXXX.yaml) + cp "$pod_config_dir/busybox-template.yaml" "$pod_config" + + sed -i "s/POD_NAME/$pod_name/" "$pod_config" + sed -i "s/CTR_NAME/$ctr_name/" "$pod_config" + + # Create a simple pod to verify templating works + kubectl create -f "${pod_config}" + kubectl wait --for=condition=Ready --timeout=120s "pod/${pod_name}" || die "Pod failed to reach Ready state" + + # Verify the pod is running + kubectl get pod "${pod_name}" | grep Running || die "Pod is not in Running state" + + # Basic test: verify we can execute a command in the pod + kubectl exec "${pod_name}" -- sh -c "echo 'Hello from templated VM' && exit 0" +} + +teardown() { + # Clean up pod from previous test + kubectl delete pod "test-templated-pod" 2>/dev/null || true + + teardown_common "${node}" "${node_start_time:-}" +} + +teardown_file() { + if [[ "${skip_vm_templating_tests:-false}" == "true" ]]; then + return 0 + fi + + # Clean up VM templates on all Kata nodes + for n in "${all_nodes[@]}"; do + echo "Destroying VM template on node: $n" >&3 + exec_host "$n" "kata-runtime factory destroy" || echo "Warning: Failed to destroy VM template on node $n" >&3 + exec_host "$n" "if [ -f '${backup_file}' ]; then sudo mv '${backup_file}' '${config_file}'; fi" || echo "Warning: Failed to restore kata config on node $n" >&3 + done + + echo "VM templates destroyed on ${#all_nodes[@]} nodes" >&3 +} diff --git a/tests/integration/kubernetes/run_kubernetes_tests.sh b/tests/integration/kubernetes/run_kubernetes_tests.sh index c2219cf8444a..45c92615737d 100755 --- a/tests/integration/kubernetes/run_kubernetes_tests.sh +++ b/tests/integration/kubernetes/run_kubernetes_tests.sh @@ -94,6 +94,7 @@ else "k8s-security-context.bats" \ "k8s-shared-volume.bats" \ "k8s-volume.bats" \ + "k8s-vm-templating-test.bats" \ "k8s-nginx-connectivity.bats" \ ) From aeaf77a269dffb11c735ac57dc1cdeec208ac994 Mon Sep 17 00:00:00 2001 From: Cameron Baird Date: Mon, 8 Jun 2026 21:41:33 +0000 Subject: [PATCH 16/17] factory: do direct boot if no template VM is fetched --- src/runtime/pkg/katautils/create.go | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/runtime/pkg/katautils/create.go b/src/runtime/pkg/katautils/create.go index e7752bb0cb22..ad1535098cc0 100644 --- a/src/runtime/pkg/katautils/create.go +++ b/src/runtime/pkg/katautils/create.go @@ -81,12 +81,8 @@ func HandleFactory(ctx context.Context, vci vc.VC, runtimeConfig *oci.RuntimeCon kataUtilsLogger.WithField("factory", factoryConfig).Info("load vm factory") f, err := vf.NewFactory(ctx, factoryConfig, true) - if err != nil && !factoryConfig.VMCache { - kataUtilsLogger.WithError(err).Warn("load vm factory failed, about to create new one") - f, err = vf.NewFactory(ctx, factoryConfig, false) - } if err != nil { - kataUtilsLogger.WithError(err).Warn("create vm factory failed") + kataUtilsLogger.WithError(err).Warn("load vm factory failed, will use direct boot") return } From d0c37a17716ea318a17824b0ca7226fbaff62798 Mon Sep 17 00:00:00 2001 From: Cameron Baird Date: Wed, 10 Jun 2026 21:26:56 +0000 Subject: [PATCH 17/17] runtime: Refactor factory/hypervisor save/restore paths for clearer separation wip commit message --- src/runtime/virtcontainers/clh.go | 313 +++++++++--------- src/runtime/virtcontainers/clh_test.go | 9 +- .../virtcontainers/factory/factory_linux.go | 5 +- .../factory/template/template_linux.go | 38 ++- src/runtime/virtcontainers/fc.go | 8 +- src/runtime/virtcontainers/fc_test.go | 2 +- src/runtime/virtcontainers/hypervisor.go | 78 +++-- .../virtcontainers/hypervisor_config_linux.go | 4 - .../hypervisor_config_linux_test.go | 24 -- src/runtime/virtcontainers/mock_hypervisor.go | 6 +- .../virtcontainers/mock_hypervisor_test.go | 2 +- src/runtime/virtcontainers/persist.go | 8 - .../virtcontainers/persist/api/config.go | 14 - src/runtime/virtcontainers/qemu.go | 86 +++-- src/runtime/virtcontainers/qemu_amd64.go | 8 - src/runtime/virtcontainers/qemu_amd64_test.go | 8 - src/runtime/virtcontainers/qemu_test.go | 11 +- src/runtime/virtcontainers/remote.go | 6 +- src/runtime/virtcontainers/stratovirt.go | 8 +- src/runtime/virtcontainers/virtframework.go | 6 +- src/runtime/virtcontainers/vm.go | 37 ++- src/runtime/virtcontainers/vm_test.go | 16 +- 22 files changed, 358 insertions(+), 339 deletions(-) diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 87c7c6ce4433..9b3e682aac3a 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -281,14 +281,12 @@ type CloudHypervisorState struct { PID int VirtiofsDaemonPid int state clhState - isRestoring bool } func (s *CloudHypervisorState) reset() { s.PID = 0 s.VirtiofsDaemonPid = 0 s.state = clhNotReady - s.isRestoring = false } type cloudHypervisor struct { @@ -587,34 +585,32 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net } } - // If the VM is booting from a template, or if the VM is going to be used as a template - // the memory is to be backed by a file, so we need to configure the memory zones accordingly. - if clh.config.BootFromTemplate || clh.config.BootToBeTemplate { - // Double-check that the clh.config.MemoryPath file exists before using it in the VM config, to avoid hitting a less clear error from cloud hypervisor when it tries to access the non-existing memory file. - if _, err := os.Stat(clh.config.MemoryPath); os.IsNotExist(err) { - return fmt.Errorf("memory file %s does not exist", clh.config.MemoryPath) + // If the guest memory is backed by a host file (e.g. for VM templating, + // where a template source uses shared memory and a clone uses private + // Copy-On-Write memory), configure the memory zones accordingly. + if clh.config.FileBackedMemory != nil { + memPath := clh.config.FileBackedMemory.Path + // Double-check that the memory file exists before using it in the VM config, to avoid hitting a less clear error from cloud hypervisor when it tries to access the non-existing memory file. + if _, err := os.Stat(memPath); os.IsNotExist(err) { + return fmt.Errorf("memory file %s does not exist", memPath) } + shared := clh.config.FileBackedMemory.Shared + // Set the size to be 0 since we are going to configure actual size via zones clh.vmconfig.Memory = chclient.NewMemoryConfig(0) memoryZoneConfig := chclient.NewMemoryZoneConfig("mem0", int64((utils.MemUnit(clh.config.MemorySize) * utils.MiB).ToBytes())) - if clh.config.BootToBeTemplate { - // When BootToBeTemplate is true, the memory file backing the VM memory is shared between multiple VMs created from the same template. - // So we need to set shared to true in this case. - memoryZoneConfig.SetShared(true) - clh.vmconfig.Memory.Shared = func(b bool) *bool { return &b }(true) - } else { - // When BootFromTemplate is true, set shared=false to ensure Copy-On-Write is used for the memory file. - // So that the VM can have its own private memory. - memoryZoneConfig.SetShared(false) - clh.vmconfig.Memory.Shared = func(b bool) *bool { return &b }(false) - } - memoryZoneConfig.SetFile(clh.config.MemoryPath) + // shared=true maps the backing file MAP_SHARED (a template source + // whose memory is shared between clones); shared=false maps it + // MAP_PRIVATE so the VM gets its own Copy-On-Write memory. + memoryZoneConfig.SetShared(shared) + clh.vmconfig.Memory.Shared = func(b bool) *bool { return &b }(shared) + memoryZoneConfig.SetFile(memPath) clh.vmconfig.Memory.Zones = &[]chclient.MemoryZoneConfig{ *memoryZoneConfig, } - } else { // Normal (non-template) VM creation + } else { // Normal (non file-backed) VM creation // Create the VM memory config via the constructor to ensure default values are properly assigned clh.vmconfig.Memory = chclient.NewMemoryConfig(int64((utils.MemUnit(clh.config.MemorySize) * utils.MiB).ToBytes())) // Memory config shared is to be enabled when using vhost_user backends, ex. virtio-fs @@ -756,45 +752,9 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net return err } - // Check if we should restore from template instead of creating new VM - if clh.config.BootFromTemplate && clh.shouldRestoreFromTemplate() { - clh.Logger().Info("Template files found, will restore VM instead of creating new") - // Mark this as a restore operation for StartVM to use RestoreVM instead - clh.state.isRestoring = true - return nil - } - return nil } -// shouldRestoreFromTemplate checks if template snapshot files exist and we should restore instead of creating new VM -func (clh *cloudHypervisor) shouldRestoreFromTemplate() bool { - // For template restore, we need the snapshot directory to contain the necessary files - // The snapshotDir is derived from the MemoryPath directory - snapshotDir := filepath.Dir(clh.config.MemoryPath) - - // Check for required template files (config.json and memory file) - configFile := filepath.Join(snapshotDir, "config.json") - memoryFile := clh.config.MemoryPath - - if _, err := os.Stat(configFile); os.IsNotExist(err) { - clh.Logger().WithField("configFile", configFile).Debug("Template config file not found") - return false - } - - if _, err := os.Stat(memoryFile); os.IsNotExist(err) { - clh.Logger().WithField("memoryFile", memoryFile).Debug("Template memory file not found") - return false - } - - clh.Logger().WithFields(log.Fields{ - "configFile": configFile, - "memoryFile": memoryFile, - }).Info("Template files found, can restore VM from template") - - return true -} - // copyFile copies a file from src to dst func (clh *cloudHypervisor) copyFile(src, dst string) error { srcFile, err := os.Open(src) @@ -876,86 +836,128 @@ func (clh *cloudHypervisor) StartVM(ctx context.Context, timeout int) error { clh.Logger().WithField("function", "StartVM").Info("starting Sandbox") + if err := clh.launchAndInit(ctx); err != nil { + return err + } + + ctx, cancel := clh.bootTimeoutContext(ctx) + defer cancel() + + if err := clh.bootVM(ctx); err != nil { + return err + } + + clh.state.state = clhReady + return nil +} + +// RestoreVM brings up the VMM and restores the virtual machine from a snapshot +// previously written to snapshotDir. The restored VM is left in a paused state; +// the caller is responsible for resuming it and performing any post-restore +// housekeeping (e.g. reseeding the RNG, syncing the guest clock). +func (clh *cloudHypervisor) RestoreVM(ctx context.Context, snapshotDir string) error { + span, ctx := katatrace.Trace(ctx, clh.Logger(), "RestoreVM", clhTracingTags, map[string]string{"sandbox_id": clh.id}) + defer span.End() + + clh.Logger().WithField("function", "RestoreVM").Info("restoring Sandbox") + + if err := clh.launchAndInit(ctx); err != nil { + return err + } + + if err := clh.prepareRestoreFiles(snapshotDir); err != nil { + return err + } + + ctx, cancel := clh.bootTimeoutContext(ctx) + defer cancel() + + if err := clh.restoreVM(ctx); err != nil { + return err + } + + clh.state.state = clhReady + return nil +} + +// launchAndInit performs the work common to StartVM and RestoreVM: it creates +// the VM runtime directory and launches the virtiofs daemon and the +// cloud-hypervisor process. On return the VMM is running and ready to accept +// API calls; the caller then boots or restores the VM under its own boot +// timeout (see bootTimeoutContext). +func (clh *cloudHypervisor) launchAndInit(ctx context.Context) error { vmPath := filepath.Join(clh.config.VMStorePath, clh.id) - err := utils.MkdirAllWithInheritedOwner(vmPath, DirMode) - if err != nil { + if err := utils.MkdirAllWithInheritedOwner(vmPath, DirMode); err != nil { return err } // This needs to be done as late as possible, just before launching - // virtiofsd are executed by kata-runtime after this call, run with - // the SELinux label. If these processes require privileged, we do - // notwant to run them under confinement. + // virtiofsd and cloud-hypervisor, since those processes are executed by + // kata-runtime with the SELinux label. If these processes require + // privileged, we do not want to run them under confinement. The label only + // needs to be active while they are spawned, so it is reset before + // returning. if !clh.config.DisableSeLinux { - if err := selinux.SetExecLabel(clh.config.SELinuxProcessLabel); err != nil { return err } defer selinux.SetExecLabel("") } - err = clh.setupVirtiofsDaemon(ctx) - if err != nil { + if err := clh.setupVirtiofsDaemon(ctx); err != nil { return err } - defer func() { - if err == nil { - return - } + if err := clh.launchClh(); err != nil { if clh.config.SharedFS == config.VirtioFS || clh.config.SharedFS == config.VirtioFSNydus { if shutdownErr := clh.stopVirtiofsDaemon(ctx); shutdownErr != nil { clh.Logger().WithError(shutdownErr).Warn("error shutting down VirtiofsDaemon") } } - }() - - err = clh.launchClh() - if err != nil { return fmt.Errorf("failed to launch cloud-hypervisor: %q", err) } + return nil +} + +// bootTimeoutContext derives a context from ctx carrying the minimum timeout +// for the CreateVM+BootVM (or restore) API sequence, which can take longer than +// a regular API call. The caller is responsible for calling the returned cancel +// func. +func (clh *cloudHypervisor) bootTimeoutContext(ctx context.Context) (context.Context, context.CancelFunc) { bootTimeout := clh.getClhAPITimeout() if bootTimeout < clhCreateAndBootVMMinimumTimeout { bootTimeout = clhCreateAndBootVMMinimumTimeout } - ctx, cancel := context.WithTimeout(ctx, bootTimeout*time.Second) - defer cancel() - - // Check if we should restore from template or create new VM - if clh.state.isRestoring { - // Copy template files to VM directory - snapshotDir := filepath.Dir(clh.config.MemoryPath) + return context.WithTimeout(ctx, bootTimeout*time.Second) +} - // Copy config.json from template to VM directory - srcConfig := filepath.Join(snapshotDir, "config.json") - dstConfig := filepath.Join(vmPath, "config.json") - if err := clh.copyFile(srcConfig, dstConfig); err != nil { - return fmt.Errorf("failed to copy config.json: %v", err) - } +// prepareRestoreFiles copies the snapshot's config.json and state.json from +// snapshotDir into the VM's runtime directory and patches the vsock socket path +// in the copied config so the restored VM uses a unique, per-VM socket. +func (clh *cloudHypervisor) prepareRestoreFiles(snapshotDir string) error { + vmPath := filepath.Join(clh.config.VMStorePath, clh.id) - // Copy state.json from template to VM directory - srcState := filepath.Join(snapshotDir, "state.json") - dstState := filepath.Join(vmPath, "state.json") - if err := clh.copyFile(srcState, dstState); err != nil { - return fmt.Errorf("failed to copy state.json: %v", err) - } + // Copy config.json from the snapshot to the VM directory + srcConfig := filepath.Join(snapshotDir, "config.json") + dstConfig := filepath.Join(vmPath, "config.json") + if err := clh.copyFile(srcConfig, dstConfig); err != nil { + return fmt.Errorf("failed to copy config.json: %v", err) + } - // Update vsock socket path in the copied config.json - if err := clh.updateVsockSocketPath(dstConfig, clh.id); err != nil { - return fmt.Errorf("failed to update vsock socket path: %v", err) - } + // Copy state.json from the snapshot to the VM directory + srcState := filepath.Join(snapshotDir, "state.json") + dstState := filepath.Join(vmPath, "state.json") + if err := clh.copyFile(srcState, dstState); err != nil { + return fmt.Errorf("failed to copy state.json: %v", err) + } - if err := clh.restoreVM(ctx); err != nil { - return err - } - } else { - if err := clh.bootVM(ctx); err != nil { - return err - } + // Update vsock socket path in the copied config.json so the restored VM + // uses a unique, per-VM socket instead of the snapshot's original one. + if err := clh.updateVsockSocketPath(dstConfig, clh.id); err != nil { + return fmt.Errorf("failed to update vsock socket path: %v", err) } - clh.state.state = clhReady return nil } @@ -1479,17 +1481,17 @@ func (clh *cloudHypervisor) PauseVM(ctx context.Context) error { return nil } -func (clh *cloudHypervisor) SaveVM() error { +func (clh *cloudHypervisor) SaveVM(snapshotDir string) error { clh.Logger().WithField("function", "SaveVM").Info("Save Sandbox") cl := clh.client() ctx, cancel := context.WithTimeout(context.Background(), clh.getClhAPITimeout()*time.Second) defer cancel() - snapshotDir := filepath.Dir(clh.config.MemoryPath) - // Create snapshot config with file URL to template path - // Use MemoryPath as base for snapshot destination - // When creating a template, the MemoryPath is set to the template path, so we can use it to save the snapshot. + // Snapshot the VM into the caller-provided directory. This is a pure + // hypervisor operation: the caller chooses the destination and is + // responsible for any feature-specific post-processing (e.g. a template + // factory adjusting the snapshot's memory sharing mode). fileURL := "file://" + snapshotDir vmSnapshotConfig := *chclient.NewVmSnapshotConfig() @@ -1501,54 +1503,59 @@ func (clh *cloudHypervisor) SaveVM() error { return openAPIClientError(err) } - if clh.config.BootToBeTemplate { - // Update the config.json file in the snapshotDir to set memory shared=false - snapshotConfigPath := filepath.Join(snapshotDir, "config.json") - snapshotConfig, err := os.ReadFile(snapshotConfigPath) - if err != nil { - clh.Logger().WithError(err).Error("Failed to read snapshot config") - return err - } + return nil +} - var snapshotConfigData map[string]interface{} - if err := json.Unmarshal(snapshotConfig, &snapshotConfigData); err != nil { - clh.Logger().WithError(err).Error("Failed to unmarshal snapshot config") - return err - } +// PatchCLHSnapshotMemoryPrivate rewrites the memory configuration in a Cloud +// Hypervisor snapshot's config.json so that the memory (and every memory zone) +// is marked shared=false, i.e. mapped MAP_PRIVATE / Copy-On-Write when the +// snapshot is later restored. +// +// It encapsulates knowledge of the CLH snapshot on-disk format and is a pure +// snapshot-directory operation that does not depend on a running hypervisor. +// Callers decide *when* to apply it: for example, the VM template factory marks +// a template's snapshot private so that clones restored from it get their own +// Copy-On-Write memory while still sharing the template's backing file. +func PatchCLHSnapshotMemoryPrivate(snapshotDir string) error { + configPath := filepath.Join(snapshotDir, "config.json") + + data, err := os.ReadFile(configPath) + if err != nil { + return fmt.Errorf("failed to read snapshot config %s: %w", configPath, err) + } - // Access the memory section and cast it to a map - if memorySection, ok := snapshotConfigData["memory"].(map[string]interface{}); ok { - memorySection["shared"] = false - // Do the same update for each element fo the "zones" array in the memorySection - if zones, ok := memorySection["zones"].([]interface{}); ok { - for _, zone := range zones { - if zoneMap, ok := zone.(map[string]interface{}); ok { - zoneMap["shared"] = false - } else { - clh.Logger().Error("Unable to access zone in snapshot config memory section") - return fmt.Errorf("invalid snapshot config structure: zone in memory section not found or invalid") - } - } - } else { - clh.Logger().Error("Unable to access zones array in snapshot config memory section") - return fmt.Errorf("invalid snapshot config structure: zones array in memory section not found or invalid") - } - } else { - clh.Logger().Error("Unable to access memory section in snapshot config") - return fmt.Errorf("invalid snapshot config structure: memory section not found or invalid") - } + var snapshotConfig map[string]interface{} + dec := json.NewDecoder(bytes.NewReader(data)) + dec.UseNumber() + if err := dec.Decode(&snapshotConfig); err != nil { + return fmt.Errorf("failed to unmarshal snapshot config: %w", err) + } - // Write the modified config back to file - modifiedConfig, err := json.Marshal(snapshotConfigData) - if err != nil { - clh.Logger().WithError(err).Error("Failed to marshal modified snapshot config") - return err - } + memorySection, ok := snapshotConfig["memory"].(map[string]interface{}) + if !ok { + return fmt.Errorf("invalid snapshot config structure: memory section not found or invalid") + } + memorySection["shared"] = false - if err := os.WriteFile(snapshotConfigPath, modifiedConfig, 0644); err != nil { - clh.Logger().WithError(err).Error("Failed to write modified snapshot config") - return err + zones, ok := memorySection["zones"].([]interface{}) + if !ok { + return fmt.Errorf("invalid snapshot config structure: zones array in memory section not found or invalid") + } + for _, zone := range zones { + zoneMap, ok := zone.(map[string]interface{}) + if !ok { + return fmt.Errorf("invalid snapshot config structure: zone in memory section not found or invalid") } + zoneMap["shared"] = false + } + + modifiedConfig, err := json.Marshal(snapshotConfig) + if err != nil { + return fmt.Errorf("failed to marshal modified snapshot config: %w", err) + } + + if err := os.WriteFile(configPath, modifiedConfig, 0644); err != nil { + return fmt.Errorf("failed to write modified snapshot config: %w", err) } return nil @@ -2014,7 +2021,7 @@ func (clh *cloudHypervisor) bootVM(ctx context.Context) error { } func (clh *cloudHypervisor) restoreVM(ctx context.Context) error { - clh.Logger().Info("Restoring VM from template") + clh.Logger().Info("Restoring VM") cl := clh.client() diff --git a/src/runtime/virtcontainers/clh_test.go b/src/runtime/virtcontainers/clh_test.go index 46f3ff0e9f07..7cc7652bcfbd 100644 --- a/src/runtime/virtcontainers/clh_test.go +++ b/src/runtime/virtcontainers/clh_test.go @@ -596,8 +596,6 @@ func TestClhSaveVM(t *testing.T) { clhConfig, err := newClhConfig() assert.NoError(err) - // For testing, assume the memory path is located within the VM store path. - clhConfig.MemoryPath = filepath.Join(store.RunVMStoragePath(), "memory") clhConfig.VMStorePath = store.RunVMStoragePath() clhConfig.RunStorePath = store.RunStoragePath() @@ -607,11 +605,12 @@ func TestClhSaveVM(t *testing.T) { APIClient: mockClient, } - err = clh.SaveVM() + snapshotDir := store.RunVMStoragePath() + err = clh.SaveVM(snapshotDir) assert.NoError(err) if assert.NotNil(mockClient.snapshotRequest) { - expectedDestinationURL := "file://" + filepath.Dir(clhConfig.MemoryPath) + expectedDestinationURL := "file://" + snapshotDir assert.Equal(expectedDestinationURL, mockClient.snapshotRequest.GetDestinationUrl()) } } @@ -670,7 +669,7 @@ func TestCloudHypervisorStartSandbox(t *testing.T) { err = clh.PauseVM(context.Background()) assert.NoError(err) - err = clh.SaveVM() + err = clh.SaveVM(clhConfig.VMStorePath) assert.NoError(err) err = clh.ResumeVM(context.Background()) diff --git a/src/runtime/virtcontainers/factory/factory_linux.go b/src/runtime/virtcontainers/factory/factory_linux.go index c010916943f8..fe8c7c58349a 100644 --- a/src/runtime/virtcontainers/factory/factory_linux.go +++ b/src/runtime/virtcontainers/factory/factory_linux.go @@ -73,10 +73,7 @@ func NewFactory(ctx context.Context, config Config, fetchOnly bool) (vc.Factory, func resetHypervisorConfig(config *vc.VMConfig) { config.HypervisorConfig.NumVCPUsF = 0 config.HypervisorConfig.MemorySize = 0 - config.HypervisorConfig.BootToBeTemplate = false - config.HypervisorConfig.BootFromTemplate = false - config.HypervisorConfig.MemoryPath = "" - config.HypervisorConfig.DevicesStatePath = "" + config.HypervisorConfig.FileBackedMemory = nil config.HypervisorConfig.SharedPath = "" config.HypervisorConfig.VMStorePath = "" config.HypervisorConfig.RunStorePath = "" diff --git a/src/runtime/virtcontainers/factory/template/template_linux.go b/src/runtime/virtcontainers/factory/template/template_linux.go index 2a23cdbccd43..81b1877a5f61 100644 --- a/src/runtime/virtcontainers/factory/template/template_linux.go +++ b/src/runtime/virtcontainers/factory/template/template_linux.go @@ -131,10 +131,13 @@ func (t *template) prepareTemplateFiles() error { func (t *template) createTemplateVM(ctx context.Context) error { // create the template vm config := t.config - config.HypervisorConfig.BootToBeTemplate = true - config.HypervisorConfig.BootFromTemplate = false - config.HypervisorConfig.MemoryPath = t.statePath + "/memory" - config.HypervisorConfig.DevicesStatePath = t.deviceStatePath() + // The template source VM is backed by a shared memory file so that clones + // can map the same file. The factory expresses this through the generic + // file-backed memory config rather than template-specific flags. + config.HypervisorConfig.FileBackedMemory = &vc.FileBackedMemoryConfig{ + Path: t.statePath + "/memory", + Shared: true, + } config.HypervisorConfig.VMStorePath = t.statePath vm, err := vc.NewVM(ctx, config) @@ -160,24 +163,39 @@ func (t *template) createTemplateVM(ctx context.Context) error { return err } - if err = vm.Save(); err != nil { + if err = vm.Save(t.statePath); err != nil { return err } + // The template source VM runs with shared memory so that clones can map + // the same backing file, but the snapshot must record the memory as + // private so that clones restored from it get Copy-On-Write memory. The + // factory owns this policy decision (when to make a snapshot private), + // while the CLH snapshot-format details live in + // vc.PatchCLHSnapshotMemoryPrivate. Only Cloud Hypervisor records a + // config.json that needs patching; QEMU's device-state file does not. + if t.config.HypervisorType == vc.ClhHypervisor { + if err = vc.PatchCLHSnapshotMemoryPrivate(t.statePath); err != nil { + return err + } + } + return nil } func (t *template) createFromTemplateVM(ctx context.Context, c vc.VMConfig) (*vc.VM, error) { config := t.config - config.HypervisorConfig.BootToBeTemplate = false - config.HypervisorConfig.BootFromTemplate = true - config.HypervisorConfig.MemoryPath = t.statePath + "/memory" - config.HypervisorConfig.DevicesStatePath = t.deviceStatePath() + // Clones restored from the template use private Copy-On-Write memory + // backed by the template's shared memory file. + config.HypervisorConfig.FileBackedMemory = &vc.FileBackedMemoryConfig{ + Path: t.statePath + "/memory", + Shared: false, + } config.HypervisorConfig.SharedPath = c.HypervisorConfig.SharedPath config.HypervisorConfig.VMStorePath = c.HypervisorConfig.VMStorePath config.HypervisorConfig.RunStorePath = c.HypervisorConfig.RunStorePath - return vc.NewVM(ctx, config) + return vc.NewVMFromSnapshot(ctx, config, t.statePath) } func (t *template) checkTemplateVM() error { diff --git a/src/runtime/virtcontainers/fc.go b/src/runtime/virtcontainers/fc.go index 339517693573..0f43f131efa6 100644 --- a/src/runtime/virtcontainers/fc.go +++ b/src/runtime/virtcontainers/fc.go @@ -901,7 +901,13 @@ func (fc *firecracker) PauseVM(ctx context.Context) error { return nil } -func (fc *firecracker) SaveVM() error { +func (fc *firecracker) SaveVM(snapshotDir string) error { + // Firecracker does not support snapshot/restore in this implementation. + return nil +} + +func (fc *firecracker) RestoreVM(ctx context.Context, snapshotDir string) error { + // Firecracker does not support snapshot/restore in this implementation. return nil } diff --git a/src/runtime/virtcontainers/fc_test.go b/src/runtime/virtcontainers/fc_test.go index 5550b68958ae..f74ed3c2d499 100644 --- a/src/runtime/virtcontainers/fc_test.go +++ b/src/runtime/virtcontainers/fc_test.go @@ -122,7 +122,7 @@ func TestFCSaveVM(t *testing.T) { assert := assert.New(t) fc := firecracker{} - err := fc.SaveVM() + err := fc.SaveVM(t.TempDir()) assert.NoError(err) } diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index 5eb95b74412c..fbcd05c81db0 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -462,6 +462,22 @@ type Param struct { Value string } +// FileBackedMemoryConfig describes guest memory that is backed by a host file. +// It is a generic primitive used by features such as VM templating, live +// migration and checkpoint/restore. The hypervisor layer only needs to know +// where the backing file lives and whether it should be mapped shared or +// private; it does not need to know which higher-level feature requested it. +type FileBackedMemoryConfig struct { + // Path is the host path of the memory backing file. + Path string + + // Shared selects the mapping mode for the backing file: MAP_SHARED when + // true (e.g. a template source whose memory is shared with clones) and + // MAP_PRIVATE/Copy-On-Write when false (e.g. a clone restored from a + // template or a migration target that needs its own private memory). + Shared bool +} + // HypervisorConfig is the hypervisor configuration. // nolint: govet type HypervisorConfig struct { @@ -513,13 +529,12 @@ type HypervisorConfig struct { // emulated. HypervisorMachineType string - // MemoryPath is the memory file path of VM memory. Used when either BootToBeTemplate or - // BootFromTemplate is true. - MemoryPath string - - // DevicesStatePath is the VM device state file path. Used when either BootToBeTemplate or - // BootFromTemplate is true. - DevicesStatePath string + // FileBackedMemory describes file-backed guest memory. When non-nil the + // guest memory is backed by the file at Path and mapped either MAP_SHARED + // (Shared=true, e.g. a template source) or MAP_PRIVATE/Copy-On-Write + // (Shared=false, e.g. a clone restored from a template or a migration + // target). When nil the guest uses standard anonymous memory. + FileBackedMemory *FileBackedMemoryConfig // EntropySource is the path to a host source of // entropy (/dev/random, /dev/urandom or real hardware RNG device) @@ -560,10 +575,20 @@ type HypervisorConfig struct { // VMid is "" if the hypervisor is not created by the factory. VMid string - // VMStorePath is the location on disk where VM information will persist + // VMStorePath is the root directory (typically /run/vc/vm, supplied by the + // persist driver's RunVMStoragePath()) under which each VM's VMM runtime + // artifacts live at /: the hypervisor's control sockets + // (e.g. Cloud Hypervisor's API and hybrid-vsock sockets, QEMU's QMP and + // console sockets), QEMU's pid file, and the config.json/state.json files + // copied in when restoring a VM from a snapshot. VMStorePath string - // VMStorePath is the location on disk where runtime information will persist + // RunStorePath is the root directory (typically /run/vc/sbs, supplied by + // the persist driver's RunStoragePath()) under which each sandbox's + // persisted state lives at /: the sandbox and container + // state written by the persist driver. (A guest memory dump reads this + // state and copies it into the dump, but the dump itself — vmcore and + // hypervisor metadata — is written under GuestMemoryDumpPath, not here.) RunStorePath string // SELinux label for the VM @@ -811,12 +836,6 @@ type HypervisorConfig struct { // Enable SEV-SNP guests on AMD machines capable of both SevSnpGuest bool - // BootToBeTemplate used to indicate if the VM is created to be a template VM - BootToBeTemplate bool - - // BootFromTemplate used to indicate if the VM should be created from a template VM - BootFromTemplate bool - // DisableVhostNet is used to indicate if host supports vhost_net DisableVhostNet bool @@ -874,24 +893,6 @@ type VcpuThreadIDs struct { vcpus map[int]int } -func (conf *HypervisorConfig) CheckTemplateConfig() error { - if conf.BootToBeTemplate && conf.BootFromTemplate { - return fmt.Errorf("Cannot set both 'to be' and 'from' vm tempate") - } - - if conf.BootToBeTemplate || conf.BootFromTemplate { - if conf.MemoryPath == "" { - return fmt.Errorf("Missing MemoryPath for vm template") - } - - if conf.BootFromTemplate && conf.DevicesStatePath == "" { - return fmt.Errorf("Missing DevicesStatePath to Load from vm template") - } - } - - return nil -} - // AddKernelParam allows the addition of new kernel parameters to an existing // hypervisor configuration. func (conf *HypervisorConfig) AddKernelParam(p Param) error { @@ -1291,7 +1292,16 @@ type Hypervisor interface { // just perform cleanup. StopVM(ctx context.Context, waitOnly bool) error PauseVM(ctx context.Context) error - SaveVM() error + // SaveVM snapshots the running VM into snapshotDir. It is a pure + // hypervisor operation: the caller chooses the destination and is + // responsible for any feature-specific post-processing (e.g. template + // memory-sharing adjustments). + SaveVM(snapshotDir string) error + // RestoreVM brings up a VM by restoring it from a snapshot previously + // written to snapshotDir. The restored VM is left in a paused state; the + // caller decides when to ResumeVM and what post-restore housekeeping to + // perform (e.g. reseeding the RNG, syncing the guest clock). + RestoreVM(ctx context.Context, snapshotDir string) error ResumeVM(ctx context.Context) error AddDevice(ctx context.Context, devInfo interface{}, devType DeviceType) error HotplugAddDevice(ctx context.Context, devInfo interface{}, devType DeviceType) (interface{}, error) diff --git a/src/runtime/virtcontainers/hypervisor_config_linux.go b/src/runtime/virtcontainers/hypervisor_config_linux.go index 9d4c8c1c6fd9..4cdc3c53f0c8 100644 --- a/src/runtime/virtcontainers/hypervisor_config_linux.go +++ b/src/runtime/virtcontainers/hypervisor_config_linux.go @@ -32,10 +32,6 @@ func validateHypervisorConfig(conf *HypervisorConfig) error { return fmt.Errorf("Image and initrd path cannot be both set") } - if err := conf.CheckTemplateConfig(); err != nil { - return err - } - if conf.NumVCPUsF == 0 { conf.NumVCPUsF = defaultVCPUs } diff --git a/src/runtime/virtcontainers/hypervisor_config_linux_test.go b/src/runtime/virtcontainers/hypervisor_config_linux_test.go index 6be2cb3dec4b..a84bd8945851 100644 --- a/src/runtime/virtcontainers/hypervisor_config_linux_test.go +++ b/src/runtime/virtcontainers/hypervisor_config_linux_test.go @@ -65,30 +65,6 @@ func TestHypervisorConfigSecureExecution(t *testing.T) { testHypervisorConfigValid(t, hypervisorConfig, false) } -func TestHypervisorConfigValidTemplateConfig(t *testing.T) { - hypervisorConfig := &HypervisorConfig{ - KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel), - ImagePath: fmt.Sprintf("%s/%s", testDir, testImage), - HypervisorPath: fmt.Sprintf("%s/%s", testDir, testHypervisor), - BootToBeTemplate: true, - BootFromTemplate: true, - } - testHypervisorConfigValid(t, hypervisorConfig, false) - - hypervisorConfig.BootToBeTemplate = false - testHypervisorConfigValid(t, hypervisorConfig, false) - hypervisorConfig.MemoryPath = "foobar" - testHypervisorConfigValid(t, hypervisorConfig, false) - hypervisorConfig.DevicesStatePath = "foobar" - testHypervisorConfigValid(t, hypervisorConfig, true) - - hypervisorConfig.BootFromTemplate = false - hypervisorConfig.BootToBeTemplate = true - testHypervisorConfigValid(t, hypervisorConfig, true) - hypervisorConfig.MemoryPath = "" - testHypervisorConfigValid(t, hypervisorConfig, false) -} - func TestHypervisorConfigDefaults(t *testing.T) { assert := assert.New(t) hypervisorConfig := &HypervisorConfig{ diff --git a/src/runtime/virtcontainers/mock_hypervisor.go b/src/runtime/virtcontainers/mock_hypervisor.go index 7d6da561faa3..8b3ad7120bcc 100644 --- a/src/runtime/virtcontainers/mock_hypervisor.go +++ b/src/runtime/virtcontainers/mock_hypervisor.go @@ -60,7 +60,11 @@ func (m *mockHypervisor) ResumeVM(ctx context.Context) error { return nil } -func (m *mockHypervisor) SaveVM() error { +func (m *mockHypervisor) SaveVM(snapshotDir string) error { + return nil +} + +func (m *mockHypervisor) RestoreVM(ctx context.Context, snapshotDir string) error { return nil } diff --git a/src/runtime/virtcontainers/mock_hypervisor_test.go b/src/runtime/virtcontainers/mock_hypervisor_test.go index ba4435f13b51..b7ae8e21251b 100644 --- a/src/runtime/virtcontainers/mock_hypervisor_test.go +++ b/src/runtime/virtcontainers/mock_hypervisor_test.go @@ -69,7 +69,7 @@ func TestMockHypervisorGetSandboxConsole(t *testing.T) { func TestMockHypervisorSaveSandbox(t *testing.T) { var m *mockHypervisor - assert.NoError(t, m.SaveVM()) + assert.NoError(t, m.SaveVM(t.TempDir())) } func TestMockHypervisorDisconnect(t *testing.T) { diff --git a/src/runtime/virtcontainers/persist.go b/src/runtime/virtcontainers/persist.go index a3cb0bc85996..66b68337edc0 100644 --- a/src/runtime/virtcontainers/persist.go +++ b/src/runtime/virtcontainers/persist.go @@ -221,8 +221,6 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) { JailerPathList: sconfig.HypervisorConfig.JailerPathList, BlockDeviceDriver: sconfig.HypervisorConfig.BlockDeviceDriver, HypervisorMachineType: sconfig.HypervisorConfig.HypervisorMachineType, - MemoryPath: sconfig.HypervisorConfig.MemoryPath, - DevicesStatePath: sconfig.HypervisorConfig.DevicesStatePath, EntropySource: sconfig.HypervisorConfig.EntropySource, EntropySourceList: sconfig.HypervisorConfig.EntropySourceList, SharedFS: sconfig.HypervisorConfig.SharedFS, @@ -243,8 +241,6 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) { FileBackedMemRootList: sconfig.HypervisorConfig.FileBackedMemRootList, DisableNestingChecks: sconfig.HypervisorConfig.DisableNestingChecks, DisableImageNvdimm: sconfig.HypervisorConfig.DisableImageNvdimm, - BootToBeTemplate: sconfig.HypervisorConfig.BootToBeTemplate, - BootFromTemplate: sconfig.HypervisorConfig.BootFromTemplate, DisableVhostNet: sconfig.HypervisorConfig.DisableVhostNet, EnableVhostUserStore: sconfig.HypervisorConfig.EnableVhostUserStore, SeccompSandbox: sconfig.HypervisorConfig.SeccompSandbox, @@ -460,8 +456,6 @@ func loadSandboxConfig(id string) (*SandboxConfig, error) { JailerPathList: hconf.JailerPathList, BlockDeviceDriver: hconf.BlockDeviceDriver, HypervisorMachineType: hconf.HypervisorMachineType, - MemoryPath: hconf.MemoryPath, - DevicesStatePath: hconf.DevicesStatePath, EntropySource: hconf.EntropySource, EntropySourceList: hconf.EntropySourceList, SharedFS: hconf.SharedFS, @@ -486,8 +480,6 @@ func loadSandboxConfig(id string) (*SandboxConfig, error) { ColdPlugVFIO: hconf.ColdPlugVFIO, PCIeRootPort: hconf.PCIeRootPort, PCIeSwitchPort: hconf.PCIeSwitchPort, - BootToBeTemplate: hconf.BootToBeTemplate, - BootFromTemplate: hconf.BootFromTemplate, DisableVhostNet: hconf.DisableVhostNet, EnableVhostUserStore: hconf.EnableVhostUserStore, VhostUserStorePath: hconf.VhostUserStorePath, diff --git a/src/runtime/virtcontainers/persist/api/config.go b/src/runtime/virtcontainers/persist/api/config.go index 82d1cceb9d8c..e91d21eee9c7 100644 --- a/src/runtime/virtcontainers/persist/api/config.go +++ b/src/runtime/virtcontainers/persist/api/config.go @@ -47,14 +47,6 @@ type HypervisorConfig struct { // emulated. HypervisorMachineType string - // MemoryPath is the memory file path of VM memory. Used when either BootToBeTemplate or - // BootFromTemplate is true. - MemoryPath string - - // DevicesStatePath is the VM device state file path. Used when either BootToBeTemplate or - // BootFromTemplate is true. - DevicesStatePath string - // EntropySource is the path to a host source of // entropy (/dev/random, /dev/urandom or real hardware RNG device) EntropySource string @@ -202,12 +194,6 @@ type HypervisorConfig struct { // PCIeSwitchPort is the number of ports needed in the hypvervisor PCIeSwitchPort uint32 - // BootToBeTemplate used to indicate if the VM is created to be a template VM - BootToBeTemplate bool - - // BootFromTemplate used to indicate if the VM should be created from a template VM - BootFromTemplate bool - // DisableVhostNet is used to indicate if host supports vhost_net DisableVhostNet bool diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 1402fab2fefc..19c8b632bb13 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -134,6 +134,11 @@ const ( qmpCapErrMsg = "Failed to negotiate QMP Capabilities" qmpExecCatCmd = "exec:cat" + // qemuDeviceStateFile is the file name, within a snapshot directory, that + // SaveVM writes the migrated VM device state to and RestoreVM reads it back + // from. + qemuDeviceStateFile = "state" + scsiControllerID = "scsi0" rngID = "rng0" fallbackFileBackedMemDir = "/dev/shm" @@ -468,25 +473,6 @@ func (q *qemu) buildDevices(ctx context.Context, kernelPath string) ([]govmmQemu return devices, ioThread, kernel, nil } -func (q *qemu) setupTemplate(knobs *govmmQemu.Knobs, memory *govmmQemu.Memory) govmmQemu.Incoming { - incoming := govmmQemu.Incoming{} - - if q.config.BootToBeTemplate || q.config.BootFromTemplate { - knobs.FileBackedMem = true - memory.Path = q.config.MemoryPath - - if q.config.BootToBeTemplate { - knobs.MemShared = true - } - - if q.config.BootFromTemplate { - incoming.MigrationType = govmmQemu.MigrationDefer - } - } - - return incoming -} - func (q *qemu) setupFileBackedMem(knobs *govmmQemu.Knobs, memory *govmmQemu.Memory) { var target string if q.config.FileBackedMemRootDir != "" { @@ -612,7 +598,17 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi IOMMUPlatform: q.config.IOMMUPlatform, } - incoming := q.setupTemplate(&knobs, &memory) + // Configure file-backed guest memory when requested (e.g. for VM + // templating). The backing path and sharing mode come straight from the + // generic config; the hypervisor does not need to know which feature + // requested it. The incoming-migration setup needed to restore a clone + // from a snapshot is handled separately by RestoreVM, not here. + incoming := govmmQemu.Incoming{} + if q.config.FileBackedMemory != nil { + knobs.FileBackedMem = true + memory.Path = q.config.FileBackedMemory.Path + knobs.MemShared = q.config.FileBackedMemory.Shared + } // With the current implementations, VM templating will not work with file // based memory (stand-alone) or virtiofs. This is because VM templating @@ -621,7 +617,7 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi // memory. if q.config.SharedFS == config.VirtioFS || q.config.SharedFS == config.VirtioFSNydus || q.config.FileBackedMemRootDir != "" { - if !(q.config.BootToBeTemplate || q.config.BootFromTemplate) { + if q.config.FileBackedMemory == nil { q.setupFileBackedMem(&knobs, &memory) } else { return errors.New("VM templating has been enabled with either virtio-fs or file backed memory and this configuration will not work") @@ -1191,12 +1187,6 @@ func (q *qemu) StartVM(ctx context.Context, timeout int) error { return err } - if q.config.BootFromTemplate { - if err = q.bootFromTemplate(); err != nil { - return err - } - } - if q.config.VirtioMem { err = q.setupVirtioMem(ctx) } @@ -1204,7 +1194,33 @@ func (q *qemu) StartVM(ctx context.Context, timeout int) error { return err } -func (q *qemu) bootFromTemplate() error { +// RestoreVM brings up the VM and restores its device state from a snapshot +// previously written to snapshotDir by SaveVM. QEMU must be launched waiting +// for an incoming migration (-incoming defer) for the restore to succeed, so +// RestoreVM configures the incoming migration, launches the VM via StartVM and +// then performs the migration-incoming from the snapshot. The restored VM is +// left paused; the caller decides when to ResumeVM and what post-restore +// housekeeping to perform. +func (q *qemu) RestoreVM(ctx context.Context, snapshotDir string) error { + span, ctx := katatrace.Trace(ctx, q.Logger(), "RestoreVM", qemuTracingTags, map[string]string{"sandbox_id": q.id}) + defer span.End() + + // Launch QEMU waiting for an incoming migration so it does not boot the + // guest until the device state has been loaded. The memory configuration + // (file-backed, private) was already set up by CreateVM from + // FileBackedMemory. + q.qemuConfig.Incoming = govmmQemu.Incoming{MigrationType: govmmQemu.MigrationDefer} + + if err := q.StartVM(ctx, VmStartTimeout); err != nil { + return err + } + + return q.restoreDeviceState(snapshotDir) +} + +// restoreDeviceState performs the QMP migration-incoming that loads the VM +// device state saved by SaveVM into the (already launched) QEMU process. +func (q *qemu) restoreDeviceState(snapshotDir string) error { if err := q.qmpSetup(); err != nil { return err } @@ -1215,7 +1231,7 @@ func (q *qemu) bootFromTemplate() error { q.Logger().WithError(err).Error("set migration ignore shared memory") return err } - uri := fmt.Sprintf("exec:cat %s", q.config.DevicesStatePath) + uri := fmt.Sprintf("exec:cat %s", filepath.Join(snapshotDir, qemuDeviceStateFile)) err = q.qmpMonitorCh.qmp.ExecuteMigrationIncoming(q.qmpMonitorCh.ctx, uri) if err != nil { return err @@ -2374,16 +2390,17 @@ func (q *qemu) GetVMConsole(ctx context.Context, id string) (string, string, err return consoleProtoUnix, consoleURL, nil } -func (q *qemu) SaveVM() error { +func (q *qemu) SaveVM(snapshotDir string) error { q.Logger().Info("Save sandbox") if err := q.qmpSetup(); err != nil { return err } - // BootToBeTemplate sets the VM to be a template that other VMs can clone from. We would want to - // bypass shared memory when saving the VM to a local file through migration exec. - if q.config.BootToBeTemplate { + // When the guest memory is backed by a shared file (e.g. a template + // source), bypass the shared memory when saving the VM to a local file + // through migration exec. + if q.config.FileBackedMemory != nil && q.config.FileBackedMemory.Shared { err := q.arch.setIgnoreSharedMemoryMigrationCaps(q.qmpMonitorCh.ctx, q.qmpMonitorCh.qmp) if err != nil { q.Logger().WithError(err).Error("set migration ignore shared memory") @@ -2391,7 +2408,8 @@ func (q *qemu) SaveVM() error { } } - err := q.qmpMonitorCh.qmp.ExecSetMigrateArguments(q.qmpMonitorCh.ctx, fmt.Sprintf("%s>%s", qmpExecCatCmd, q.config.DevicesStatePath)) + deviceStatePath := filepath.Join(snapshotDir, qemuDeviceStateFile) + err := q.qmpMonitorCh.qmp.ExecSetMigrateArguments(q.qmpMonitorCh.ctx, fmt.Sprintf("%s>%s", qmpExecCatCmd, deviceStatePath)) if err != nil { q.Logger().WithError(err).Error("exec migration") return err diff --git a/src/runtime/virtcontainers/qemu_amd64.go b/src/runtime/virtcontainers/qemu_amd64.go index 6350ebb3b69f..58c35eb150e2 100644 --- a/src/runtime/virtcontainers/qemu_amd64.go +++ b/src/runtime/virtcontainers/qemu_amd64.go @@ -27,8 +27,6 @@ type qemuAmd64 struct { snpGuest bool - vmFactory bool - devLoadersCount uint32 sgxEPCSize int64 @@ -97,11 +95,6 @@ func newQemuArch(config HypervisorConfig) (qemuArch, error) { return nil, fmt.Errorf("unrecognised machinetype: %v", machineType) } - factory := false - if config.BootToBeTemplate || config.BootFromTemplate { - factory = true - } - // IOMMU and Guest Protection require a split IRQ controller for handling interrupts // otherwise QEMU won't be able to create the kernel irqchip if config.IOMMU || config.ConfidentialGuest { @@ -128,7 +121,6 @@ func newQemuArch(config HypervisorConfig) (qemuArch, error) { protection: noneProtection, legacySerial: config.LegacySerial, }, - vmFactory: factory, snpGuest: config.SevSnpGuest, qgsPort: config.QgsPort, snpIdBlock: config.SnpIdBlock, diff --git a/src/runtime/virtcontainers/qemu_amd64_test.go b/src/runtime/virtcontainers/qemu_amd64_test.go index f8cb53844b89..61d1138c3851 100644 --- a/src/runtime/virtcontainers/qemu_amd64_test.go +++ b/src/runtime/virtcontainers/qemu_amd64_test.go @@ -84,14 +84,6 @@ func TestQemuAmd64CPUModel(t *testing.T) { expectedOut := defaultCPUModel model := amd64.cpuModel() assert.Equal(expectedOut, model) - - amd64.disableNestingChecks() - base, ok := amd64.(*qemuAmd64) - assert.True(ok) - base.vmFactory = true - expectedOut = defaultCPUModel - model = amd64.cpuModel() - assert.Equal(expectedOut, model) } func TestQemuAmd64MemoryTopology(t *testing.T) { diff --git a/src/runtime/virtcontainers/qemu_test.go b/src/runtime/virtcontainers/qemu_test.go index 53b0ff716cf3..71ae38957647 100644 --- a/src/runtime/virtcontainers/qemu_test.go +++ b/src/runtime/virtcontainers/qemu_test.go @@ -125,13 +125,13 @@ func TestQemuCreateVM(t *testing.T) { config9.HugePages = false config10 := newQemuConfig() - config10.BootToBeTemplate = true + config10.FileBackedMemory = &FileBackedMemoryConfig{Shared: true} config11 := newQemuConfig() - config11.BootFromTemplate = true + config11.FileBackedMemory = &FileBackedMemoryConfig{Shared: false} config12 := newQemuConfig() - config12.BootToBeTemplate = true + config12.FileBackedMemory = &FileBackedMemoryConfig{Shared: true} config12.SharedFS = config.VirtioFS config13 := newQemuConfig() @@ -216,7 +216,7 @@ func TestQemuCreateVM(t *testing.T) { err = q.setupVirtioMem(context.Background()) assert.Error(err) - err = q.SaveVM() + err = q.SaveVM("/tmp") assert.Error(err) err = q.StopVM(context.Background(), true) @@ -621,9 +621,8 @@ func TestQemuFileBackedMem(t *testing.T) { RunStorePath: sandbox.store.RunStoragePath(), }, } - sandbox.config.HypervisorConfig.BootToBeTemplate = true + sandbox.config.HypervisorConfig.FileBackedMemory = &FileBackedMemoryConfig{Path: fallbackFileBackedMemDir, Shared: true} sandbox.config.HypervisorConfig.SharedFS = config.VirtioFS - sandbox.config.HypervisorConfig.MemoryPath = fallbackFileBackedMemDir err = q.CreateVM(context.Background(), sandbox.id, network, &sandbox.config.HypervisorConfig) diff --git a/src/runtime/virtcontainers/remote.go b/src/runtime/virtcontainers/remote.go index d88a3ca4e909..c73ad4b9c7ff 100644 --- a/src/runtime/virtcontainers/remote.go +++ b/src/runtime/virtcontainers/remote.go @@ -193,10 +193,14 @@ func (rh *remoteHypervisor) PauseVM(ctx context.Context) error { return notImplemented("PauseVM") } -func (rh *remoteHypervisor) SaveVM() error { +func (rh *remoteHypervisor) SaveVM(snapshotDir string) error { return notImplemented("SaveVM") } +func (rh *remoteHypervisor) RestoreVM(ctx context.Context, snapshotDir string) error { + return notImplemented("RestoreVM") +} + func (rh *remoteHypervisor) ResumeVM(ctx context.Context) error { return notImplemented("ResumeVM") } diff --git a/src/runtime/virtcontainers/stratovirt.go b/src/runtime/virtcontainers/stratovirt.go index 54bb39346838..bc711539424c 100644 --- a/src/runtime/virtcontainers/stratovirt.go +++ b/src/runtime/virtcontainers/stratovirt.go @@ -1111,7 +1111,13 @@ func (s *stratovirt) PauseVM(ctx context.Context) error { return nil } -func (s *stratovirt) SaveVM() error { +func (s *stratovirt) SaveVM(snapshotDir string) error { + // StratoVirt does not support snapshot/restore in this implementation. + return nil +} + +func (s *stratovirt) RestoreVM(ctx context.Context, snapshotDir string) error { + // StratoVirt does not support snapshot/restore in this implementation. return nil } diff --git a/src/runtime/virtcontainers/virtframework.go b/src/runtime/virtcontainers/virtframework.go index 9196e291c2a4..26eea833cb46 100644 --- a/src/runtime/virtcontainers/virtframework.go +++ b/src/runtime/virtcontainers/virtframework.go @@ -38,7 +38,11 @@ func (vfw *virtFramework) PauseVM(ctx context.Context) error { return nil } -func (vfw *virtFramework) SaveVM() error { +func (vfw *virtFramework) SaveVM(snapshotDir string) error { + return nil +} + +func (vfw *virtFramework) RestoreVM(ctx context.Context, snapshotDir string) error { return nil } diff --git a/src/runtime/virtcontainers/vm.go b/src/runtime/virtcontainers/vm.go index 8c60a8980d1e..a33493399a7f 100644 --- a/src/runtime/virtcontainers/vm.go +++ b/src/runtime/virtcontainers/vm.go @@ -84,6 +84,21 @@ func GrpcToVMConfig(j *pb.GrpcVMConfig) (*VMConfig, error) { // NewVM creates a new VM based on provided VMConfig. func NewVM(ctx context.Context, config VMConfig) (*VM, error) { + return newVM(ctx, config, "") +} + +// NewVMFromSnapshot creates a VM by restoring it from a snapshot previously +// written to snapshotDir. The returned VM is in a paused state; the caller is +// responsible for resuming it and performing any post-restore housekeeping +// (e.g. reseeding the RNG, syncing the guest clock). +func NewVMFromSnapshot(ctx context.Context, config VMConfig, snapshotDir string) (*VM, error) { + return newVM(ctx, config, snapshotDir) +} + +// newVM creates a new VM based on the provided VMConfig. When +// restoreSnapshotDir is non-empty the VM is restored from the snapshot in that +// directory (and left paused) instead of being booted fresh. +func newVM(ctx context.Context, config VMConfig, restoreSnapshotDir string) (*VM, error) { // 1. setup hypervisor hypervisor, err := NewHypervisor(config.HypervisorType) if err != nil { @@ -134,9 +149,15 @@ func NewVM(ctx context.Context, config VMConfig) (*VM, error) { return nil, err } - // 3. boot up guest vm - if err = hypervisor.StartVM(ctx, VmStartTimeout); err != nil { - return nil, err + // 3. boot up (or restore) the guest vm + if restoreSnapshotDir != "" { + if err = hypervisor.RestoreVM(ctx, restoreSnapshotDir); err != nil { + return nil, err + } + } else { + if err = hypervisor.StartVM(ctx, VmStartTimeout); err != nil { + return nil, err + } } defer func() { @@ -147,8 +168,8 @@ func NewVM(ctx context.Context, config VMConfig) (*VM, error) { }() // 4. Check agent aliveness - // VMs booted from template are paused, do not Check - if !config.HypervisorConfig.BootFromTemplate { + // Restored VMs (e.g. clones from a template) are paused, do not Check + if restoreSnapshotDir == "" { virtLog.WithField("vm", id).Info("Check agent status") err = agent.check(ctx) if err != nil { @@ -223,10 +244,10 @@ func (v *VM) Pause(ctx context.Context) error { return v.hypervisor.PauseVM(ctx) } -// Save saves a VM to persistent disk. -func (v *VM) Save() error { +// Save snapshots a VM into snapshotDir. +func (v *VM) Save(snapshotDir string) error { v.logger().Info("Save vm") - return v.hypervisor.SaveVM() + return v.hypervisor.SaveVM(snapshotDir) } // Resume resumes a paused VM. diff --git a/src/runtime/virtcontainers/vm_test.go b/src/runtime/virtcontainers/vm_test.go index f37cd9715561..03ab73b9967c 100644 --- a/src/runtime/virtcontainers/vm_test.go +++ b/src/runtime/virtcontainers/vm_test.go @@ -46,7 +46,7 @@ func TestNewVM(t *testing.T) { assert.Nil(err) err = vm.Disconnect(context.Background()) assert.Nil(err) - err = vm.Save() + err = vm.Save(testDir) assert.Nil(err) err = vm.Stop(context.Background()) assert.Nil(err) @@ -71,16 +71,8 @@ func TestNewVM(t *testing.T) { err = vm.ReseedRNG(context.Background()) assert.Nil(err) - // template VM - config.HypervisorConfig.BootFromTemplate = true - _, err = NewVM(ctx, config) - assert.Error(err) - - config.HypervisorConfig.MemoryPath = testDir - _, err = NewVM(ctx, config) - assert.Error(err) - - config.HypervisorConfig.DevicesStatePath = testDir - _, err = NewVM(ctx, config) + // restore a VM from a snapshot + vmFromSnapshot, err := NewVMFromSnapshot(ctx, config, testDir) assert.Nil(err) + assert.NotNil(vmFromSnapshot) }