Skip to content


Use Block CIM layers for container RootFS
Browse files Browse the repository at this point in the history
This commit adds the ability to parse block CIM layer mounts and to mount the merged block
CIMs to be used as a rootfs for a container.

Signed-off-by: Amit Barve <>
ambarve committed Dec 17, 2024
1 parent dd74204 commit f234e83
Showing 5 changed files with 325 additions and 56 deletions.
7 changes: 5 additions & 2 deletions internal/layers/helpers.go
Original file line number Diff line number Diff line change
@@ -75,8 +75,11 @@ const (
// parent layer CIMs
parentLayerCimPathsFlag = "parentCimPaths="

LegacyMountType string = "windows-layer"
CimFSMountType string = "CimFS"
legacyMountType string = "windows-layer"
forkedCIMMountType string = "CimFS"
blockCIMMountType string = "BlockCIM"
blockCIMTypeFlag string = "blockCIMType="
mergedCIMPathFlag string = "mergedCIMPath="

// getOptionAsArray finds if there is an option which has the given prefix and if such an
166 changes: 119 additions & 47 deletions internal/layers/wcow_mount.go
Original file line number Diff line number Diff line change
@@ -12,12 +12,14 @@ import (


hcsschema ""
@@ -37,6 +39,11 @@ func MountWCOWLayers(ctx context.Context, containerID string, vm *uvm.UtilityVM,
return mountProcessIsolatedForkedCimLayers(ctx, containerID, l)
return nil, nil, fmt.Errorf("hyperv isolated containers aren't supported with forked cim layers")
case *wcowBlockCIMLayers:
if vm == nil {
return mountProcessIsolatedBlockCIMLayers(ctx, containerID, l)
return nil, nil, fmt.Errorf("hyperv isolated containers aren't supported with block cim layers")
return nil, nil, fmt.Errorf("invalid layer type %T", wl)
@@ -171,53 +178,43 @@ func mountProcessIsolatedWCIFSLayers(ctx context.Context, l *wcowWCIFSLayers) (_
}, nil

// wcowHostForkedCIMLayerCloser is used to cleanup forked CIM layers mounted on the host for process isolated
// containers
type wcowHostForkedCIMLayerCloser struct {
containerID string

func (l *wcowHostForkedCIMLayerCloser) Release(ctx context.Context) error {
mountPath, err := wclayer.GetLayerMountPath(ctx, l.scratchLayerPath)
if err != nil {
return err

if err = computestorage.DetachOverlayFilter(ctx, mountPath, hcsschema.UnionFS); err != nil {
return err

if err = cimlayer.CleanupContainerMounts(l.containerID); err != nil {
return err
return wclayer.DeactivateLayer(ctx, l.scratchLayerPath)
// Handles the common processing for mounting all 3 types of cimfs layers. This involves
// mounting the scratch, attaching the filter and preparing the return values.
// `volume` is the path to the volume at which read only layer CIMs are mounted.
func mountProcessIsolatedCimLayersCommon(ctx context.Context, containerID string, volume string, s *scratchLayerData) (_ *MountedWCOWLayers, _ resources.ResourceCloser, err error) {
ctx, span := oc.StartSpan(ctx, "mountProcessIsolatedCimLayersCommon")
defer func() {
oc.SetSpanStatus(span, err)
trace.StringAttribute("scratch path", s.scratchLayerPath),
trace.StringAttribute("mounted CIM volume", volume))

func mountProcessIsolatedForkedCimLayers(ctx context.Context, containerID string, l *wcowForkedCIMLayers) (_ *MountedWCOWLayers, _ resources.ResourceCloser, err error) {
if err = wclayer.ActivateLayer(ctx, l.scratchLayerPath); err != nil {
return nil, nil, err
rcl := &resources.ResourceCloserList{}
defer func() {
if err != nil {
_ = wclayer.DeactivateLayer(ctx, l.scratchLayerPath)
if rErr := rcl.Release(ctx); rErr != nil {
log.G(ctx).WithError(err).Warnf("mount process isolated cim layers common, undo failed with: %s", rErr)

mountPath, err := wclayer.GetLayerMountPath(ctx, l.scratchLayerPath)
if err != nil {
if err = wclayer.ActivateLayer(ctx, s.scratchLayerPath); err != nil {
return nil, nil, err
rcl.AddFunc(func(uCtx context.Context) error {
return wclayer.DeactivateLayer(uCtx, s.scratchLayerPath)

volume, err := cimlayer.MountForkedCimLayer(ctx, l.layers[0].cimPath, containerID)
mountPath, err := wclayer.GetLayerMountPath(ctx, s.scratchLayerPath)
if err != nil {
return nil, nil, fmt.Errorf("mount layer cim: %w", err)
return nil, nil, err
defer func() {
if err != nil {
_ = cimlayer.UnmountCimLayer(ctx, volume)
"scratch": s.scratchLayerPath,
"mounted path": mountPath,
}).Debug("scratch activated")

layerID, err := cimlayer.LayerID(volume)
if err != nil {
@@ -239,22 +236,97 @@ func mountProcessIsolatedForkedCimLayers(ctx context.Context, containerID string
if err = computestorage.AttachOverlayFilter(ctx, mountPath, layerData); err != nil {
return nil, nil, err
rcl.AddFunc(func(uCtx context.Context) error {
return computestorage.DetachOverlayFilter(uCtx, mountPath, hcsschema.UnionFS)

log.G(ctx).WithField("layer data", layerData).Debug("unionFS filter attached")

return &MountedWCOWLayers{
RootFS: mountPath,
MountedLayerPaths: []MountedWCOWLayer{{
LayerID: layerID,
MountedPath: volume,
}, rcl, nil

func mountProcessIsolatedForkedCimLayers(ctx context.Context, containerID string, l *wcowForkedCIMLayers) (_ *MountedWCOWLayers, _ resources.ResourceCloser, err error) {
ctx, span := oc.StartSpan(ctx, "mountProcessIsolatedForkedCimLayers")
defer func() {
oc.SetSpanStatus(span, err)

rcl := &resources.ResourceCloserList{}
defer func() {
if err != nil {
if rErr := rcl.Release(ctx); rErr != nil {
log.G(ctx).WithError(err).Warnf("mount process isolated forked CIM layers, undo failed with: %s", rErr)

volume, err := cimlayer.MountForkedCimLayer(ctx, l.layers[0].cimPath, containerID)
if err != nil {
return nil, nil, fmt.Errorf("mount forked layer cim: %w", err)
rcl.AddFunc(func(uCtx context.Context) error {
return cimlayer.UnmountCimLayer(uCtx, volume)

mountedLayers, closer, err := mountProcessIsolatedCimLayersCommon(ctx, containerID, volume, &l.scratchLayerData)
if err != nil {
return nil, nil, err
return mountedLayers, rcl.Add(closer), nil

func mountProcessIsolatedBlockCIMLayers(ctx context.Context, containerID string, l *wcowBlockCIMLayers) (_ *MountedWCOWLayers, _ resources.ResourceCloser, err error) {
ctx, span := oc.StartSpan(ctx, "mountProcessIsolatedBlockCIMLayers")
defer func() {
oc.SetSpanStatus(span, err)

var volume string

rcl := &resources.ResourceCloserList{}
defer func() {
if err != nil {
_ = computestorage.DetachOverlayFilter(ctx, mountPath, hcsschema.UnionFS)
if rErr := rcl.Release(ctx); rErr != nil {
log.G(ctx).WithError(err).Warnf("mount process isolated forked CIM layers, undo failed with: %s", rErr)

return &MountedWCOWLayers{
RootFS: mountPath,
MountedLayerPaths: []MountedWCOWLayer{{
LayerID: layerID,
MountedPath: volume,
}, &wcowHostForkedCIMLayerCloser{
containerID: containerID,
scratchLayerData: l.scratchLayerData,
}, nil
"scratch": l.scratchLayerPath,
"merged layer": l.mergedLayer,
"parent layers": l.parentLayers,
}).Debug("mounting process isolated block CIM layers")

if len(l.parentLayers) > 1 {
volume, err = cimlayer.MergeMountBlockCIMLayer(ctx, l.mergedLayer, l.parentLayers, containerID)
} else {
volume, err = cimlayer.MountBlockCIMLayer(ctx, l.parentLayers[0], containerID)
if err != nil {
return nil, nil, fmt.Errorf("mount block CIM layers: %w", err)
rcl.AddFunc(func(uCtx context.Context) error {
return cimlayer.UnmountCimLayer(uCtx, volume)

log.G(ctx).WithField("volume", volume).Debug("mounted blockCIM layers for process isolated container")

mountedLayers, layerCloser, err := mountProcessIsolatedCimLayersCommon(ctx, containerID, volume, &l.scratchLayerData)
if err != nil {
return nil, nil, fmt.Errorf("failed mount CIM layers common: %w", err)

return mountedLayers, rcl, nil

type wcowIsolatedWCIFSLayerCloser struct {
95 changes: 90 additions & 5 deletions internal/layers/wcow_parse.go
Original file line number Diff line number Diff line change
@@ -5,15 +5,18 @@ package layers

import (



// WCOW image layers is a tagging interface that all WCOW layers MUST implement. This is
@@ -67,6 +70,17 @@ type wcowForkedCIMLayers struct {
layers []forkedCIMLayer

// Represents CIM layers where each layer is stored in a block device or in a single file
// and multiple such layer CIMs are merged before mounting them. Currently can only be
// used for process isolated containers.
type wcowBlockCIMLayers struct {
// parent layers in order [layerN (top-most), layerN-1,..layer0 (base)]
parentLayers []*cimfs.BlockCIM
// a merged layer is prepared by combining all parent layers
mergedLayer *cimfs.BlockCIM

func parseForkedCimMount(m *types.Mount) (*wcowForkedCIMLayers, error) {
parentLayerPaths, err := getOptionAsArray(m, parentLayerPathsFlag)
if err != nil {
@@ -94,8 +108,77 @@ func parseForkedCimMount(m *types.Mount) (*wcowForkedCIMLayers, error) {
}, nil

// ParseWCOWLayers parses the layers provided by containerd into the format understood by hcsshim and prepares
// them for mounting.
// TODO(ambarve): The code to parse a mount type should be in a separate package/module
// somewhere and then should be consumed by both hcsshim & containerd from there.
func parseBlockCIMMount(m *types.Mount) (*wcowBlockCIMLayers, error) {
var (
parentPaths []string
layerType cimfs.BlockCIMType
mergedCIMPath string

for _, option := range m.Options {
if val, ok := strings.CutPrefix(option, parentLayerCimPathsFlag); ok {
err := json.Unmarshal([]byte(val), &parentPaths)
if err != nil {
return nil, err
} else if val, ok = strings.CutPrefix(option, blockCIMTypeFlag); ok {
if val == "device" {
layerType = cimfs.BlockCIMTypeDevice
} else if val == "file" {
layerType = cimfs.BlockCIMTypeSingleFile
} else {
return nil, fmt.Errorf("invalid block CIM type `%s`", val)
} else if val, ok = strings.CutPrefix(option, mergedCIMPathFlag); ok {
mergedCIMPath = val

if len(parentPaths) == 0 {
return nil, fmt.Errorf("need at least 1 parent layer")
if layerType == cimfs.BlockCIMTypeNone {
return nil, fmt.Errorf("BlockCIM type not provided")
if mergedCIMPath == "" && len(parentPaths) > 1 {
return nil, fmt.Errorf("merged CIM path not provided")

var (
parentLayers []*cimfs.BlockCIM
mergedLayer *cimfs.BlockCIM

if len(parentPaths) > 1 {
// for single parent layers merge won't be done
mergedLayer = &cimfs.BlockCIM{
Type: layerType,
BlockPath: filepath.Dir(mergedCIMPath),
CimName: filepath.Base(mergedCIMPath),

for _, p := range parentPaths {
parentLayers = append(parentLayers, &cimfs.BlockCIM{
Type: layerType,
BlockPath: filepath.Dir(p),
CimName: filepath.Base(p),

return &wcowBlockCIMLayers{
scratchLayerData: scratchLayerData{
scratchLayerPath: m.Source,
parentLayers: parentLayers,
mergedLayer: mergedLayer,
}, nil

// ParseWCOWLayers parses the layers provided by containerd into the format understood by
// hcsshim and prepares them for mounting.
func ParseWCOWLayers(rootfs []*types.Mount, layerFolders []string) (WCOWLayers, error) {
if err := validateRootfsAndLayers(rootfs, layerFolders); err != nil {
return nil, err
@@ -112,7 +195,7 @@ func ParseWCOWLayers(rootfs []*types.Mount, layerFolders []string) (WCOWLayers,

m := rootfs[0]
switch m.Type {
case LegacyMountType:
case legacyMountType:
parentLayers, err := getOptionAsArray(m, parentLayerPathsFlag)
if err != nil {
return nil, err
@@ -123,8 +206,10 @@ func ParseWCOWLayers(rootfs []*types.Mount, layerFolders []string) (WCOWLayers,
layerPaths: parentLayers,
}, nil
case CimFSMountType:
case forkedCIMMountType:
return parseForkedCimMount(m)
case blockCIMMountType:
return parseBlockCIMMount(m)
return nil, fmt.Errorf("invalid windows mount type: '%s'", m.Type)
@@ -146,7 +231,7 @@ func GetWCOWUVMBootFilesFromLayers(ctx context.Context, rootfs []*types.Mount, l
} else {
m := rootfs[0]
switch m.Type {
case LegacyMountType:
case legacyMountType:
parentLayers, err = getOptionAsArray(m, parentLayerPathsFlag)
if err != nil {
return nil, err
24 changes: 24 additions & 0 deletions internal/resources/resources.go
Original file line number Diff line number Diff line change
@@ -168,3 +168,27 @@ func ReleaseResources(ctx context.Context, r *Resources, vm *uvm.UtilityVM, all
return nil

type ResourceCloserList struct {
closers []ResourceCloser

func (l *ResourceCloserList) Add(rOp ResourceCloser) *ResourceCloserList {
l.closers = append(l.closers, rOp)
return l

func (l *ResourceCloserList) AddFunc(rOp ResourceCloserFunc) *ResourceCloserList {
l.closers = append(l.closers, rOp)
return l

func (l *ResourceCloserList) Release(ctx context.Context) error {
// MUST release in the reverse order
for i := len(l.closers) - 1; i >= 0; i-- {
if oErr := l.closers[i].Release(ctx); oErr != nil {
return oErr
return nil
89 changes: 87 additions & 2 deletions internal/wclayer/cim/mount.go
Original file line number Diff line number Diff line change
@@ -6,11 +6,15 @@ import (

hcsschema ""
cimfs ""

var cimMountNamespace guid.GUID = guid.GUID{Data1: 0x6827367b, Data2: 0xc388, Data3: 0x4e9b, Data4: [8]byte{0x96, 0x1c, 0x6d, 0x2c, 0x93, 0x6c}}
@@ -25,13 +29,88 @@ func MountForkedCimLayer(ctx context.Context, cimPath, containerID string) (stri
return "", fmt.Errorf("generated cim mount GUID: %w", err)

vol, err := cimfs.Mount(cimPath, volumeGUID, hcsschema.CimMountFlagCacheFiles)
vol, err := cimfs.Mount(cimPath, volumeGUID, 0)
if err != nil {
return "", err
return vol, nil

// MountBlockCIMLayer mounts the given block cim and returns the mount
// location of that cim. The containerID is used to generate the volumeID for the volume
// at which this CIM is mounted. containerID is used so that if the shim process crashes
// for any reason, the mounted cim can be correctly cleaned up during `shim delete` call.
func MountBlockCIMLayer(ctx context.Context, layer *cimfs.BlockCIM, containerID string) (_ string, err error) {
ctx, span := oc.StartSpan(ctx, "MountBlockCIMLayer")
defer func() {
oc.SetSpanStatus(span, err)
trace.StringAttribute("layer", layer.String()))

var mountFlags uint32
switch layer.Type {
case cimfs.BlockCIMTypeDevice:
mountFlags |= cimfs.CimMountBlockDeviceCim
case cimfs.BlockCIMTypeSingleFile:
mountFlags |= cimfs.CimMountSingleFileCim
return "", fmt.Errorf("invalid BlockCIMType for merged layer: %w", os.ErrInvalid)

volumeGUID, err := guid.NewV5(cimMountNamespace, []byte(containerID))
if err != nil {
return "", fmt.Errorf("generated cim mount GUID: %w", err)

cimPath := filepath.Join(layer.BlockPath, layer.CimName)

"flags": mountFlags,
"volume": volumeGUID.String(),
}).Debug("mounting block layer CIM")

vol, err := cimfs.Mount(cimPath, volumeGUID, mountFlags)
if err != nil {
return "", err
return vol, nil

// MergeMountBlockCIMLayer mounts the given merged block cim and returns the mount
// location of that cim. The containerID is used to generate the volumeID for the volume
// at which this CIM is mounted. containerID is used so that if the shim process crashes
// for any reason, the mounted cim can be correctly cleaned up during `shim delete` call.
// parentLayers MUST be in the base to topmost order. I.e base layer should be at index 0
// and immediate parent MUST be at the last index.
func MergeMountBlockCIMLayer(ctx context.Context, mergedLayer *cimfs.BlockCIM, parentLayers []*cimfs.BlockCIM, containerID string) (_ string, err error) {
_, span := oc.StartSpan(ctx, "MergeMountBlockCIMLayer")
defer func() {
oc.SetSpanStatus(span, err)
trace.StringAttribute("merged layer", mergedLayer.String()),
trace.StringAttribute("parent layers", fmt.Sprintf("%v", parentLayers)))

var mountFlags uint32
switch mergedLayer.Type {
case cimfs.BlockCIMTypeDevice:
mountFlags |= cimfs.CimMountBlockDeviceCim
case cimfs.BlockCIMTypeSingleFile:
mountFlags |= cimfs.CimMountSingleFileCim
return "", fmt.Errorf("invalid BlockCIMType for merged layer: %w", os.ErrInvalid)

volumeGUID, err := guid.NewV5(cimMountNamespace, []byte(containerID))
if err != nil {
return "", fmt.Errorf("generated cim mount GUID: %w", err)
return cimfs.MountMergedBlockCIMs(mergedLayer, parentLayers, mountFlags, volumeGUID)

// Unmounts the cim mounted at the given volume
func UnmountCimLayer(ctx context.Context, volume string) error {
return cimfs.Unmount(volume)
@@ -44,6 +123,12 @@ func CleanupContainerMounts(containerID string) error {

volPath := fmt.Sprintf("\\\\?\\Volume{%s}\\", volumeGUID.String())

"volume": volPath,
"containerID": containerID,
}).Debug("cleanup container CIM mounts")

if _, err := os.Stat(volPath); err == nil {
err = cimfs.Unmount(volPath)
if err != nil {

0 comments on commit f234e83

Please sign in to comment.