diff --git a/cmd/cephcsi.go b/cmd/cephcsi.go index a43ac276362e..84e240aa350b 100644 --- a/cmd/cephcsi.go +++ b/cmd/cephcsi.go @@ -26,6 +26,7 @@ import ( "github.com/ceph/ceph-csi/internal/cephfs" "github.com/ceph/ceph-csi/internal/controller" "github.com/ceph/ceph-csi/internal/controller/persistentvolume" + "github.com/ceph/ceph-csi/internal/controller/volumegroup" "github.com/ceph/ceph-csi/internal/liveness" nfsdriver "github.com/ceph/ceph-csi/internal/nfs/driver" rbddriver "github.com/ceph/ceph-csi/internal/rbd/driver" @@ -289,6 +290,7 @@ func setPIDLimit(conf *util.Config) { func initControllers() { // Add list of controller here. persistentvolume.Init() + volumegroup.Init() } func validateCloneDepthFlag(conf *util.Config) { diff --git a/internal/controller/controller.go b/internal/controller/controller.go index 37066045e914..b002306e41ee 100644 --- a/internal/controller/controller.go +++ b/internal/controller/controller.go @@ -20,11 +20,16 @@ import ( "github.com/ceph/ceph-csi/internal/util/log" + apiruntime "k8s.io/apimachinery/pkg/runtime" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/tools/leaderelection/resourcelock" clientConfig "sigs.k8s.io/controller-runtime/pkg/client/config" "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/manager/signals" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" + + replicationv1alpha1 "github.com/csi-addons/kubernetes-csi-addons/apis/replication.storage/v1alpha1" ) // Manager is the interface that will wrap Add function. @@ -60,6 +65,9 @@ func addToManager(mgr manager.Manager, config Config) error { // Start will start all the registered managers. func Start(config Config) error { + scheme := apiruntime.NewScheme() + utilruntime.Must(replicationv1alpha1.AddToScheme(scheme)) + utilruntime.Must(clientgoscheme.AddToScheme(scheme)) electionID := config.DriverName + "-" + config.Namespace opts := manager.Options{ LeaderElection: true, @@ -68,6 +76,7 @@ func Start(config Config) error { LeaderElectionNamespace: config.Namespace, LeaderElectionResourceLock: resourcelock.LeasesResourceLock, LeaderElectionID: electionID, + Scheme: scheme, } mgr, err := manager.New(clientConfig.GetConfigOrDie(), opts) if err != nil { diff --git a/internal/controller/volumegroup/volumegroupreplicationcontent.go b/internal/controller/volumegroup/volumegroupreplicationcontent.go new file mode 100644 index 000000000000..e0a07662329e --- /dev/null +++ b/internal/controller/volumegroup/volumegroupreplicationcontent.go @@ -0,0 +1,211 @@ +/* +Copyright 2024 The Ceph-CSI Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +package volumegroup + +import ( + "context" + "errors" + "fmt" + + ctrl "github.com/ceph/ceph-csi/internal/controller" + "github.com/ceph/ceph-csi/internal/rbd" + "github.com/ceph/ceph-csi/internal/util" + "github.com/ceph/ceph-csi/internal/util/log" + + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "sigs.k8s.io/controller-runtime/pkg/source" + + replicationv1alpha1 "github.com/csi-addons/kubernetes-csi-addons/apis/replication.storage/v1alpha1" +) + +type ReconcileVGRContent struct { + client client.Client + config ctrl.Config + Locks *util.VolumeLocks +} + +var ( + _ reconcile.Reconciler = &ReconcileVGRContent{} + _ ctrl.Manager = &ReconcileVGRContent{} +) + +const ( + secretNameParameterName string = "replication.storage.openshift.io/group-replication-secret-name" + secretNamespaceParameterName string = "replication.storage.openshift.io/group-replication-secret-namespace" +) + +// Init will add the ReconcileVGRContent to the list. +func Init() { + // add ReconcileVGRContent to the list + ctrl.ControllerList = append(ctrl.ControllerList, &ReconcileVGRContent{}) +} + +// Add adds the newVGRContentReconciler. +func (r *ReconcileVGRContent) Add(mgr manager.Manager, config ctrl.Config) error { + return add(mgr, newVGRContentReconciler(mgr, config)) +} + +// newVGRContentReconciler returns a ReconcileVGRContent. +func newVGRContentReconciler(mgr manager.Manager, config ctrl.Config) reconcile.Reconciler { + r := &ReconcileVGRContent{ + client: mgr.GetClient(), + config: config, + Locks: util.NewVolumeLocks(), + } + + return r +} + +func add(mgr manager.Manager, r reconcile.Reconciler) error { + // Create a new controller + c, err := controller.New( + "vgrcontent-controller", + mgr, + controller.Options{MaxConcurrentReconciles: 1, Reconciler: r}) + if err != nil { + return err + } + + // Watch for changes to VolumeGroupReplicationContent + err = c.Watch(source.Kind( + mgr.GetCache(), + &replicationv1alpha1.VolumeGroupReplicationContent{}, + &handler.TypedEnqueueRequestForObject[*replicationv1alpha1.VolumeGroupReplicationContent]{}), + ) + if err != nil { + return fmt.Errorf("failed to watch the changes: %w", err) + } + + return nil +} + +func (r *ReconcileVGRContent) getSecrets( + ctx context.Context, + name, + namespace string, +) (map[string]string, error) { + if name == "" || namespace == "" { + errStr := "secret name or secret namespace is empty" + log.ErrorLogMsg(errStr) + + return nil, errors.New(errStr) + } + secret := &corev1.Secret{} + err := r.client.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, secret) + if err != nil { + return nil, fmt.Errorf("error getting secret %s in namespace %s: %w", name, namespace, err) + } + + secrets := map[string]string{} + for key, value := range secret.Data { + secrets[key] = string(value) + } + + return secrets, nil +} + +func (r *ReconcileVGRContent) reconcileVGRContent(ctx context.Context, obj runtime.Object) error { + vgrc, ok := obj.(*replicationv1alpha1.VolumeGroupReplicationContent) + if !ok { + return nil + } + if vgrc.Spec.Provisioner != r.config.DriverName { + return nil + } + + reqName := vgrc.Name + groupHandle := vgrc.Spec.VolumeGroupReplicationHandle + volumeIds := vgrc.Spec.Source.VolumeHandles + + vgrClass := &replicationv1alpha1.VolumeGroupReplicationClass{} + err := r.client.Get(ctx, types.NamespacedName{Name: vgrc.Spec.VolumeGroupReplicationClassName}, vgrClass) + if err != nil { + log.ErrorLogMsg("failed to get volumeGroupReplicationClass %s", err) + + return err + } + + if ok = r.Locks.TryAcquire(groupHandle); !ok { + return fmt.Errorf("failed to acquire lock for group handle %s", groupHandle) + } + defer r.Locks.Release(groupHandle) + + parameters := vgrClass.Spec.Parameters + secretName := vgrClass.Spec.Parameters[secretNameParameterName] + secretNamespace := vgrClass.Spec.Parameters[secretNamespaceParameterName] + + secrets, err := r.getSecrets(ctx, secretName, secretNamespace) + if err != nil { + log.ErrorLogMsg("failed to get credentials from secret %s", err) + + return err + } + + groupID, err := rbd.RegenerateVolumeGroupJournal( + groupHandle, + reqName, + r.config.InstanceID, + r.config.ClusterName, + volumeIds, + parameters, + secrets, + ) + if err != nil { + log.ErrorLogMsg("failed to regenerate volume group journal %s", err) + + return err + } + if groupID != groupHandle { + log.DebugLog(ctx, "groupHandle changed from %s to %s", groupHandle, groupID) + } + + return nil +} + +// Reconcile reconciles the VolumeGroupReplicationContent object and creates a new omap entries +// for the volume. +func (r *ReconcileVGRContent) Reconcile(ctx context.Context, + request reconcile.Request, +) (reconcile.Result, error) { + vgrc := &replicationv1alpha1.VolumeGroupReplicationContent{} + err := r.client.Get(ctx, request.NamespacedName, vgrc) + if err != nil { + if apierrors.IsNotFound(err) { + return reconcile.Result{}, nil + } + + return reconcile.Result{}, err + } + // Check if the object is under deletion + if !vgrc.GetDeletionTimestamp().IsZero() { + return reconcile.Result{}, nil + } + + err = r.reconcileVGRContent(ctx, vgrc) + if err != nil { + return reconcile.Result{}, err + } + + return reconcile.Result{}, nil +} diff --git a/internal/rbd/rbd_journal.go b/internal/rbd/rbd_journal.go index 44c951c53385..578923cc2588 100644 --- a/internal/rbd/rbd_journal.go +++ b/internal/rbd/rbd_journal.go @@ -22,6 +22,7 @@ import ( "fmt" "github.com/ceph/ceph-csi/internal/journal" + "github.com/ceph/ceph-csi/internal/rbd/types" "github.com/ceph/ceph-csi/internal/util" "github.com/ceph/ceph-csi/internal/util/k8s" "github.com/ceph/ceph-csi/internal/util/log" @@ -701,3 +702,143 @@ func (rv *rbdVolume) storeImageID(ctx context.Context, j *journal.Connection) er return nil } + +// RegenerateVolumeGroupJournal regenerate the omap data for the volume group. +// This performs the following operations: +// - extracts clusterID and Mons from the clusterID mapping +// - Retrieves pool and journalPool parameters from the VolumeGroupReplicationClass +// - Reserves omap data +// - Add volumeIDs mapping to the reserved volume group omap object +// - Generate new volume group handler +// +// Note: The new volume group handler will differ from the original as it includes +// poolID and clusterID, which vary between clusters. +func RegenerateVolumeGroupJournal( + groupID, + requestName, + instanceID, + clusterName string, + volumeIds []string, + parameters, + secrets map[string]string, +) (string, error) { + var ( + clusterID string + monitors string + pool string + journalPool string + namePrefix string + groupUUID string + vgName string + + gi util.CSIIdentifier + ok bool + err error + ) + ctx := context.Background() + + mgr, ok := (NewManager(instanceID, parameters, secrets)).(*rbdManager) + if !ok { + return "", errors.New("rbdManager doesn't implement the Manager interface") + } + defer mgr.Destroy(ctx) + + err = gi.DecomposeCSIID(groupID) + if err != nil { + return "", fmt.Errorf("%w: error decoding volume group ID (%w) (%s)", ErrInvalidVolID, err, groupID) + } + + monitors, clusterID, err = util.FetchMappedClusterIDAndMons(ctx, gi.ClusterID) + if err != nil { + return "", err + } + + pool, ok = mgr.parameters["pool"] + if !ok { + return "", errors.New("required 'pool' parameter missing in volume attributes") + } + + journalPool = mgr.parameters["journalPool"] + if journalPool == "" { + journalPool = pool + } + + vgJournal, err := mgr.getVolumeGroupJournal(clusterID) + if err != nil { + return "", err + } + defer vgJournal.Destroy() + + namePrefix = mgr.parameters["volumeNamePrefix"] + vgData, err := vgJournal.CheckReservation(ctx, journalPool, requestName, namePrefix) + if err != nil { + return "", err + } + + if vgData != nil && vgData.GroupUUID != "" { + groupUUID = vgData.GroupUUID + vgName = vgData.GroupName + } else { + log.DebugLog(ctx, "the journal does not contain a reservation for a volume group with name %q yet", requestName) + groupUUID, vgName, err = vgJournal.ReserveName(ctx, journalPool, requestName, gi.ObjectUUID, namePrefix) + if err != nil { + return "", fmt.Errorf("failed to reserve volume group for name %q: %w", requestName, err) + } + defer func() { + if err != nil { + err = vgJournal.UndoReservation(ctx, journalPool, vgName, requestName) + if err != nil { + log.ErrorLog(ctx, "failed to undo the reservation for volume group %q: %w", requestName, err) + } + } + }() + } + + volumes := make([]types.Volume, len(volumeIds)) + var volume types.Volume + for i, id := range volumeIds { + volume, err = mgr.GetVolumeByID(ctx, id) + if err != nil { + // free up allocated volumes + for _, v := range volumes { + v.Destroy(ctx) + } + + return "", fmt.Errorf("failed to find required volume %q for volume group id %q: %w", id, vgName, err) + } + + volumes[i] = volume + } + + var volID string + for _, vol := range volumes { + volID, err = vol.GetID(ctx) + if err != nil { + return "", fmt.Errorf("failed to get VolumeID for %q: %w", vol, err) + } + + toAdd := map[string]string{ + volID: "", + } + log.DebugLog(ctx, "adding volume mapping for volume %q to volume group %q", volID, vgName) + err = mgr.vgJournal.AddVolumesMapping(ctx, pool, gi.ObjectUUID, toAdd) + if err != nil { + return "", fmt.Errorf("failed to add mapping for volume %q to volume group %q: %w", volID, vgName, err) + } + } + + _, poolID, err := util.GetPoolIDs(ctx, monitors, journalPool, pool, mgr.creds) + if err != nil { + return "", fmt.Errorf("failed to get poolID for %q: %w", groupUUID, err) + } + + groupHandle, err := util.GenerateVolID(ctx, monitors, mgr.creds, poolID, pool, clusterID, groupUUID) + if err != nil { + return "", fmt.Errorf("failed to generate a unique CSI volume group with uuid for %q: %w", groupUUID, err) + } + + log.DebugLog(ctx, "re-generated Group ID (%s) and Group Name (%s) for request name (%s)", + groupHandle, vgName, requestName) + + return groupHandle, nil +}