Skip to content

Commit a323bad

Browse files
committed
Don't set an ownerRef on secrets users are susceptible to copy around
A k8s bug (kubernetes/kubernetes#65200) may cause the k8s garbage collection to delete undesired resources in case users manually copy an operator-managed secret to another namespace. To avoid that situation, this commit ensures no ownerRef is set on a subset of managed secrets users are susceptible to copy around: - the elastic user password secret - elasticsearch public transport certs - elasticsearch, kibana, enterprise search, apm server public http certs Existing ownerReferences set with earlier ECK versions will be removed when reconciled. Since they do not have an ownerRef anymore, those secrets are not automatically deleted when the Elasticsearch resource is deleted. To work around that situation, the secret reconciliation logic adds an additional set of labels to the reconciled secrets that don't have an ownerRef specified. These labels reference the "soft" owner ("soft" as in handled through some custom code and not through k8s builtin garbage collection logic). Once a controller receives a deletion event for the resource it manages, it will automatically remove the soft-owned secrets. This is done as best-effort. Secrets will remain orphan if: - the operator is not running when the owner is deleted - an error happens while deleting the soft-owned secrets
1 parent 8bedd23 commit a323bad

File tree

9 files changed

+206
-31
lines changed

9 files changed

+206
-31
lines changed

pkg/controller/apmserver/controller.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,12 @@ func (r *ReconcileApmServer) onDelete(obj types.NamespacedName) {
308308
r.dynamicWatches.Secrets.RemoveHandlerForKey(keystore.SecureSettingsWatchName(obj))
309309
// Clean up watches set on custom http tls certificates
310310
r.dynamicWatches.Secrets.RemoveHandlerForKey(certificates.CertificateWatchKey(Namer, obj.Name))
311+
if err := reconciler.GarbageCollectSoftOwnedSecrets(r.Client, obj); err != nil {
312+
// this is best-effort only, some secrets may remain orphan in case of error here,
313+
// or if the operator was down during the owner deletion
314+
log.Error(err, "namespace", obj.Namespace, "apm_name", obj.Name,
315+
"Failed to garbage collect secrets, they should be removed manually")
316+
}
311317
}
312318

313319
// reconcileApmServerToken reconciles a Secret containing the APM Server token.
@@ -333,7 +339,9 @@ func reconcileApmServerToken(c k8s.Client, as *apmv1.ApmServer) (corev1.Secret,
333339
expectedApmServerSecret.Data[SecretTokenKey] = common.RandomBytes(24)
334340
}
335341

336-
return reconciler.ReconcileSecret(c, expectedApmServerSecret, as)
342+
// Don't set an ownerRef for the APM token secret, likely to be copied into different namespaces.
343+
// See https://github.com/elastic/cloud-on-k8s/issues/3986.
344+
return reconciler.ReconcileSecretNoOwnerRef(c, expectedApmServerSecret, as)
337345
}
338346

339347
func (r *ReconcileApmServer) updateStatus(ctx context.Context, state State) error {

pkg/controller/beat/controller.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,12 @@ func (r *ReconcileBeat) Reconcile(request reconcile.Request) (reconcile.Result,
152152
}
153153

154154
if beat.IsMarkedForDeletion() {
155+
if err := reconciler.GarbageCollectSoftOwnedSecrets(r.Client, request.NamespacedName); err != nil {
156+
// this is best-effort only, some secrets may remain orphan in case of error here,
157+
// or if the operator was down during the owner deletion
158+
log.Error(err, "namespace", beat.Namespace, "beat_name", beat.Name,
159+
"Failed to garbage collect secrets, they should be removed manually")
160+
}
155161
return reconcile.Result{}, nil
156162
}
157163

pkg/controller/common/certificates/http_reconcile.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,9 @@ func (r Reconciler) ReconcilePublicHTTPCerts(internalCerts *CertificatesSecret)
4545
expected.Data[CAFileName] = caPem
4646
}
4747

48-
_, err := reconciler.ReconcileSecret(r.K8sClient, expected, r.Object)
48+
// Don't set an ownerRef for public http certs secrets, likely to be copied into different namespaces.
49+
// See https://github.com/elastic/cloud-on-k8s/issues/3986.
50+
_, err := reconciler.ReconcileSecretNoOwnerRef(r.K8sClient, expected, r.Object)
4951
return err
5052
}
5153

pkg/controller/common/reconciler/secret.go

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,17 @@ import (
1010
"github.com/elastic/cloud-on-k8s/pkg/utils/k8s"
1111
"github.com/elastic/cloud-on-k8s/pkg/utils/maps"
1212
corev1 "k8s.io/api/core/v1"
13+
apierrors "k8s.io/apimachinery/pkg/api/errors"
1314
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
15+
"k8s.io/apimachinery/pkg/types"
16+
"sigs.k8s.io/controller-runtime/pkg/client"
17+
)
18+
19+
// Labels set on secrets which cannot rely on owner references due to https://github.com/kubernetes/kubernetes/issues/65200,
20+
// but should still be garbage-collected (best-effort) by the operator upon owner deletion.
21+
const (
22+
SoftOwnerNamespaceLabel = "eck.k8s.elastic.co/soft-owner-namespace"
23+
SoftOwnerNameLabel = "eck.k8s.elastic.co/soft-owner-name"
1424
)
1525

1626
// ReconcileSecret creates or updates the actual secret to match the expected one.
@@ -41,3 +51,120 @@ func ReconcileSecret(c k8s.Client, expected corev1.Secret, owner metav1.Object)
4151
}
4252
return reconciled, nil
4353
}
54+
55+
// ReconcileSecretNoOwnerRef should be called to reconcile a Secret for which we explicitly don't want
56+
// an owner reference to be set, and want existing ownerRefs from previous operator versions to be removed,
57+
// because of this k8s bug: https://github.com/kubernetes/kubernetes/issues/65200 (fixed in k8s 1.20).
58+
//
59+
// It makes sense to use this function for secrets which are likely to be manually
60+
// copied into other namespaces by the end user.
61+
// Because of the k8s bug mentioned above, the ownerRef could trigger a racy garbage collection
62+
// that deletes all children resources, potentially resulting in data loss.
63+
// See https://github.com/elastic/cloud-on-k8s/issues/3986 for more details.
64+
//
65+
// Since they won't have an ownerRef specified, reconciled secrets will not be deleted automatically on parent deletion.
66+
// To account for that, we add a label for best-effort garbage collection by the operator on parent resource deletion.
67+
func ReconcileSecretNoOwnerRef(c k8s.Client, expected corev1.Secret, theoreticalOwner metav1.Object) (corev1.Secret, error) {
68+
// this function is similar to "ReconcileSecret", but:
69+
// - we don't pass an owner
70+
// - we remove the existing owner
71+
// - we set additional labels to perform garbage collection on owner deletion (best-effort)
72+
expected.Labels[SoftOwnerNamespaceLabel] = theoreticalOwner.GetNamespace()
73+
expected.Labels[SoftOwnerNameLabel] = theoreticalOwner.GetName()
74+
75+
var reconciled corev1.Secret
76+
if err := ReconcileResource(Params{
77+
Client: c,
78+
Owner: nil,
79+
Expected: &expected,
80+
Reconciled: &reconciled,
81+
NeedsUpdate: func() bool {
82+
// update if expected labels and annotations are not there
83+
return !maps.IsSubset(expected.Labels, reconciled.Labels) ||
84+
!maps.IsSubset(expected.Annotations, reconciled.Annotations) ||
85+
// or if secret data is not strictly equal
86+
!reflect.DeepEqual(expected.Data, reconciled.Data) ||
87+
// or if an existing owner should be removed
88+
hasOwner(&reconciled, theoreticalOwner)
89+
},
90+
UpdateReconciled: func() {
91+
// set expected annotations and labels, but don't remove existing ones
92+
// that may have been defaulted or set by the user on the existing resource
93+
reconciled.Labels = maps.Merge(reconciled.Labels, expected.Labels)
94+
reconciled.Annotations = maps.Merge(reconciled.Annotations, expected.Annotations)
95+
reconciled.Data = expected.Data
96+
// remove existing owner
97+
removeOwner(&reconciled, theoreticalOwner)
98+
},
99+
}); err != nil {
100+
return corev1.Secret{}, err
101+
}
102+
return reconciled, nil
103+
}
104+
105+
// GarbageCollectSoftOwnedSecrets deletes all secrets whose labels reference a soft owner.
106+
// To be called once that owner gets deleted.
107+
func GarbageCollectSoftOwnedSecrets(c k8s.Client, deletedParent types.NamespacedName) error {
108+
var secrets corev1.SecretList
109+
if err := c.List(
110+
&secrets,
111+
// restrict to secrets in the parent namespace, we don't want to delete
112+
// secrets users may have manually copied into other namespaces
113+
client.InNamespace(deletedParent.Namespace),
114+
// restrict to secrets on which we set the soft owner label
115+
client.MatchingLabels{
116+
SoftOwnerNamespaceLabel: deletedParent.Namespace,
117+
SoftOwnerNameLabel: deletedParent.Name,
118+
},
119+
); err != nil {
120+
return err
121+
}
122+
for i := range secrets.Items {
123+
s := secrets.Items[i]
124+
log.Info("Garbage collecting secret", "namespace", deletedParent.Namespace, "secret_name", s.Name)
125+
err := c.Delete(&s)
126+
if apierrors.IsNotFound(err) {
127+
// already deleted, all good
128+
continue
129+
}
130+
if err != nil {
131+
return err
132+
}
133+
}
134+
return nil
135+
}
136+
137+
func hasOwner(resource metav1.Object, owner metav1.Object) bool {
138+
if owner == nil || resource == nil {
139+
return false
140+
}
141+
found, _ := findOwner(resource, owner)
142+
return found
143+
}
144+
145+
func removeOwner(resource metav1.Object, owner metav1.Object) {
146+
if resource == nil || owner == nil {
147+
return
148+
}
149+
found, index := findOwner(resource, owner)
150+
if !found {
151+
return
152+
}
153+
owners := resource.GetOwnerReferences()
154+
// remove the owner at index i from the slice
155+
newOwners := append(owners[:index], owners[index+1:]...)
156+
resource.SetOwnerReferences(newOwners)
157+
}
158+
159+
func findOwner(resource metav1.Object, owner metav1.Object) (found bool, index int) {
160+
if owner == nil || resource == nil {
161+
return false, 0
162+
}
163+
ownerRefs := resource.GetOwnerReferences()
164+
for i := range ownerRefs {
165+
if ownerRefs[i].Name == owner.GetName() && ownerRefs[i].UID == owner.GetUID() {
166+
return true, i
167+
}
168+
}
169+
return false, 0
170+
}

pkg/controller/elasticsearch/certificates/transport/public_secret.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,10 @@ func ReconcileTransportCertsPublicSecret(
3232
certificates.CAFileName: certificates.EncodePEMCert(ca.Cert.Raw),
3333
},
3434
}
35-
_, err := reconciler.ReconcileSecret(c, expected, &es)
35+
36+
// Don't set an ownerRef for public transport certs secrets, likely to be copied into different namespaces.
37+
// See https://github.com/elastic/cloud-on-k8s/issues/3986.
38+
_, err := reconciler.ReconcileSecretNoOwnerRef(c, expected, &es)
3639
return err
3740
}
3841

pkg/controller/elasticsearch/elasticsearch_controller.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,4 +321,10 @@ func (r *ReconcileElasticsearch) onDelete(es types.NamespacedName) {
321321
r.dynamicWatches.Secrets.RemoveHandlerForKey(certificates.CertificateWatchKey(esv1.ESNamer, es.Name))
322322
r.dynamicWatches.Secrets.RemoveHandlerForKey(user.UserProvidedRolesWatchName(es))
323323
r.dynamicWatches.Secrets.RemoveHandlerForKey(user.UserProvidedFileRealmWatchName(es))
324+
if err := reconciler.GarbageCollectSoftOwnedSecrets(r.Client, es); err != nil {
325+
// this is best-effort only, some secrets may remain orphan in case of error here,
326+
// or if the operator was down during the owner deletion
327+
log.Error(err, "namespace", es.Namespace, "es_name", es.Name,
328+
"Failed to garbage collect secrets, they should be removed manually")
329+
}
324330
}

pkg/controller/elasticsearch/user/predefined.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ func reconcileElasticUser(c k8s.Client, es esv1.Elasticsearch, existingFileRealm
3939
{Name: ElasticUserName, Roles: []string{SuperUserBuiltinRole}},
4040
},
4141
esv1.ElasticUserSecret(es.Name),
42+
// Don't set an ownerRef for the elastic user secret, likely to be copied into different namespaces.
43+
// See https://github.com/elastic/cloud-on-k8s/issues/3986.
44+
false,
4245
)
4346
}
4447

@@ -52,7 +55,9 @@ func reconcileInternalUsers(c k8s.Client, es esv1.Elasticsearch, existingFileRea
5255
{Name: ControllerUserName, Roles: []string{SuperUserBuiltinRole}},
5356
{Name: ProbeUserName, Roles: []string{ProbeUserRole}},
5457
},
55-
esv1.InternalUsersSecret(es.Name))
58+
esv1.InternalUsersSecret(es.Name),
59+
true,
60+
)
5661
}
5762

5863
// reconcilePredefinedUsers reconciles a secret with the given name holding the given users.
@@ -63,6 +68,7 @@ func reconcilePredefinedUsers(
6368
existingFileRealm filerealm.Realm,
6469
users users,
6570
secretName string,
71+
setOwnerRef bool,
6672
) (users, error) {
6773
secretNsn := types.NamespacedName{Namespace: es.Namespace, Name: secretName}
6874

@@ -92,7 +98,11 @@ func reconcilePredefinedUsers(
9298
Data: secretData,
9399
}
94100

95-
_, err = reconciler.ReconcileSecret(c, expected, &es)
101+
if setOwnerRef {
102+
_, err = reconciler.ReconcileSecret(c, expected, &es)
103+
} else {
104+
_, err = reconciler.ReconcileSecretNoOwnerRef(c, expected, &es)
105+
}
96106
return users, err
97107
}
98108

pkg/controller/enterprisesearch/enterprisesearch_controller.go

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,6 @@ import (
1111
"reflect"
1212
"sync/atomic"
1313

14-
"go.elastic.co/apm"
15-
appsv1 "k8s.io/api/apps/v1"
16-
corev1 "k8s.io/api/core/v1"
17-
apierrors "k8s.io/apimachinery/pkg/api/errors"
18-
"k8s.io/apimachinery/pkg/types"
19-
"k8s.io/client-go/tools/record"
20-
"sigs.k8s.io/controller-runtime/pkg/controller"
21-
"sigs.k8s.io/controller-runtime/pkg/handler"
22-
logf "sigs.k8s.io/controller-runtime/pkg/log"
23-
"sigs.k8s.io/controller-runtime/pkg/manager"
24-
"sigs.k8s.io/controller-runtime/pkg/reconcile"
25-
"sigs.k8s.io/controller-runtime/pkg/source"
26-
2714
entv1beta1 "github.com/elastic/cloud-on-k8s/pkg/apis/enterprisesearch/v1beta1"
2815
"github.com/elastic/cloud-on-k8s/pkg/controller/association"
2916
"github.com/elastic/cloud-on-k8s/pkg/controller/common"
@@ -33,11 +20,24 @@ import (
3320
"github.com/elastic/cloud-on-k8s/pkg/controller/common/driver"
3421
"github.com/elastic/cloud-on-k8s/pkg/controller/common/events"
3522
"github.com/elastic/cloud-on-k8s/pkg/controller/common/operator"
23+
"github.com/elastic/cloud-on-k8s/pkg/controller/common/reconciler"
3624
"github.com/elastic/cloud-on-k8s/pkg/controller/common/tracing"
3725
"github.com/elastic/cloud-on-k8s/pkg/controller/common/version"
3826
"github.com/elastic/cloud-on-k8s/pkg/controller/common/watches"
3927
entName "github.com/elastic/cloud-on-k8s/pkg/controller/enterprisesearch/name"
4028
"github.com/elastic/cloud-on-k8s/pkg/utils/k8s"
29+
"go.elastic.co/apm"
30+
appsv1 "k8s.io/api/apps/v1"
31+
corev1 "k8s.io/api/core/v1"
32+
apierrors "k8s.io/apimachinery/pkg/api/errors"
33+
"k8s.io/apimachinery/pkg/types"
34+
"k8s.io/client-go/tools/record"
35+
"sigs.k8s.io/controller-runtime/pkg/controller"
36+
"sigs.k8s.io/controller-runtime/pkg/handler"
37+
logf "sigs.k8s.io/controller-runtime/pkg/log"
38+
"sigs.k8s.io/controller-runtime/pkg/manager"
39+
"sigs.k8s.io/controller-runtime/pkg/reconcile"
40+
"sigs.k8s.io/controller-runtime/pkg/source"
4141
)
4242

4343
const (
@@ -185,6 +185,13 @@ func (r *ReconcileEnterpriseSearch) onDelete(obj types.NamespacedName) {
185185
r.dynamicWatches.Secrets.RemoveHandlerForKey(common.ConfigRefWatchName(obj))
186186
// Clean up watches set on custom http tls certificates
187187
r.dynamicWatches.Secrets.RemoveHandlerForKey(certificates.CertificateWatchKey(entName.EntNamer, obj.Name))
188+
189+
if err := reconciler.GarbageCollectSoftOwnedSecrets(r.Client, obj); err != nil {
190+
// this is best-effort only, some secrets may remain orphan in case of error here,
191+
// or if the operator was down during the owner deletion
192+
log.Error(err, "namespace", obj.Namespace, "ent_name", obj.Name,
193+
"Failed to garbage collect secrets, they should be removed manually")
194+
}
188195
}
189196

190197
func (r *ReconcileEnterpriseSearch) isCompatible(ctx context.Context, ent *entv1beta1.EnterpriseSearch) (bool, error) {

pkg/controller/kibana/controller.go

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,6 @@ import (
99
"reflect"
1010
"sync/atomic"
1111

12-
"go.elastic.co/apm"
13-
appsv1 "k8s.io/api/apps/v1"
14-
corev1 "k8s.io/api/core/v1"
15-
apierrors "k8s.io/apimachinery/pkg/api/errors"
16-
"k8s.io/apimachinery/pkg/types"
17-
"k8s.io/client-go/tools/record"
18-
"sigs.k8s.io/controller-runtime/pkg/controller"
19-
"sigs.k8s.io/controller-runtime/pkg/handler"
20-
logf "sigs.k8s.io/controller-runtime/pkg/log"
21-
"sigs.k8s.io/controller-runtime/pkg/manager"
22-
"sigs.k8s.io/controller-runtime/pkg/reconcile"
23-
"sigs.k8s.io/controller-runtime/pkg/source"
24-
2512
kbv1 "github.com/elastic/cloud-on-k8s/pkg/apis/kibana/v1"
2613
"github.com/elastic/cloud-on-k8s/pkg/controller/association"
2714
"github.com/elastic/cloud-on-k8s/pkg/controller/common"
@@ -31,9 +18,22 @@ import (
3118
"github.com/elastic/cloud-on-k8s/pkg/controller/common/finalizer"
3219
"github.com/elastic/cloud-on-k8s/pkg/controller/common/keystore"
3320
"github.com/elastic/cloud-on-k8s/pkg/controller/common/operator"
21+
"github.com/elastic/cloud-on-k8s/pkg/controller/common/reconciler"
3422
"github.com/elastic/cloud-on-k8s/pkg/controller/common/tracing"
3523
"github.com/elastic/cloud-on-k8s/pkg/controller/common/watches"
3624
"github.com/elastic/cloud-on-k8s/pkg/utils/k8s"
25+
"go.elastic.co/apm"
26+
appsv1 "k8s.io/api/apps/v1"
27+
corev1 "k8s.io/api/core/v1"
28+
apierrors "k8s.io/apimachinery/pkg/api/errors"
29+
"k8s.io/apimachinery/pkg/types"
30+
"k8s.io/client-go/tools/record"
31+
"sigs.k8s.io/controller-runtime/pkg/controller"
32+
"sigs.k8s.io/controller-runtime/pkg/handler"
33+
logf "sigs.k8s.io/controller-runtime/pkg/log"
34+
"sigs.k8s.io/controller-runtime/pkg/manager"
35+
"sigs.k8s.io/controller-runtime/pkg/reconcile"
36+
"sigs.k8s.io/controller-runtime/pkg/source"
3737
)
3838

3939
const (
@@ -249,6 +249,12 @@ func (r *ReconcileKibana) onDelete(obj types.NamespacedName) {
249249
r.dynamicWatches.Secrets.RemoveHandlerForKey(keystore.SecureSettingsWatchName(obj))
250250
// Clean up watches set on custom http tls certificates
251251
r.dynamicWatches.Secrets.RemoveHandlerForKey(certificates.CertificateWatchKey(Namer, obj.Name))
252+
if err := reconciler.GarbageCollectSoftOwnedSecrets(r.Client, obj); err != nil {
253+
// this is best-effort only, some secrets may remain orphan in case of error here,
254+
// or if the operator was down during the owner deletion
255+
log.Error(err, "namespace", obj.Namespace, "kb_name", obj.Name,
256+
"Failed to garbage collect secrets, they should be removed manually")
257+
}
252258
}
253259

254260
// State holds the accumulated state during the reconcile loop including the response and a pointer to a Kibana

0 commit comments

Comments
 (0)