package kubernetes import ( "context" "fmt" "regexp" "strings" "time" "oc-datacenter/conf" "oc-datacenter/infrastructure" "oc-datacenter/infrastructure/admiralty" "oc-datacenter/infrastructure/storage" oclib "cloud.o-forge.io/core/oc-lib" "cloud.o-forge.io/core/oc-lib/dbs" bookingmodel "cloud.o-forge.io/core/oc-lib/models/booking" "cloud.o-forge.io/core/oc-lib/models/workflow_execution" "cloud.o-forge.io/core/oc-lib/tools" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) // uuidNsPattern matches Kubernetes namespace names that are execution UUIDs. var uuidNsPattern = regexp.MustCompile(`^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$`) // WatchInfra is a safety-net watchdog that periodically scans Kubernetes for // execution namespaces whose WorkflowExecution has reached a terminal state // but whose infra was never torn down (e.g. because WORKFLOW_DONE_EVENT was // missed due to oc-monitord or oc-datacenter crash/restart). // // Must be launched in a goroutine from main. func (s *KubernetesService) Watch() { logger := oclib.GetLogger() logger.Info().Msg("InfraWatchdog: started") ticker := time.NewTicker(5 * time.Minute) defer ticker.Stop() for range ticker.C { if err := s.scanOrphaned(); err != nil { logger.Error().Msg("InfraWatchdog: " + err.Error()) } if err := s.scanOrphanedMinio(); err != nil { logger.Error().Msg("InfraWatchdog(minio): " + err.Error()) } if err := s.scanOrphanedAdmiraltyNodes(); err != nil { logger.Error().Msg("InfraWatchdog(admiralty-nodes): " + err.Error()) } if err := s.scanOrphanedPVC(); err != nil { logger.Error().Msg("InfraWatchdog(pvc): " + err.Error()) } } } // scanOrphanedInfra lists all UUID-named Kubernetes namespaces, looks up their // WorkflowExecution in the DB, and triggers teardown for any that are in a // terminal state. Namespaces already in Terminating phase are skipped. func (s *KubernetesService) scanOrphaned() error { logger := oclib.GetLogger() serv, err := tools.NewKubernetesService( conf.GetConfig().KubeHost+":"+conf.GetConfig().KubePort, conf.GetConfig().KubeCA, conf.GetConfig().KubeCert, conf.GetConfig().KubeData, ) if err != nil { return fmt.Errorf("failed to init k8s service: %w", err) } ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() nsList, err := serv.Set.CoreV1().Namespaces().List(ctx, metav1.ListOptions{}) if err != nil { return fmt.Errorf("failed to list namespaces: %w", err) } myself, err := oclib.GetMySelf() if err != nil { return fmt.Errorf("could not resolve local peer: %w", err) } peerID := myself.GetID() for _, ns := range nsList.Items { executionsID := ns.Name if !uuidNsPattern.MatchString(executionsID) { continue } // Skip namespaces already being deleted by a previous teardown. if ns.Status.Phase == v1.NamespaceTerminating { continue } exec := findTerminalExecution(executionsID, peerID) if exec == nil { continue } logger.Info().Msgf("InfraWatchdog: orphaned infra detected for execution %s (state=%v) → teardown", executionsID, exec.State) go s.TeardownForExecution(exec.GetID()) } return nil } // scanOrphanedMinio scans LIVE_STORAGE bookings for executions that are in a // terminal state and triggers Minio teardown for each unique executionsID found. // This covers the case where the Kubernetes namespace is already gone (manual // deletion, prior partial teardown) but Minio SA and bucket were never revoked. func (s *KubernetesService) scanOrphanedMinio() error { logger := oclib.GetLogger() myself, err := oclib.GetMySelf() if err != nil { return fmt.Errorf("could not resolve local peer: %w", err) } peerID := myself.GetID() res := oclib.NewRequest(oclib.LibDataEnum(oclib.BOOKING), "", peerID, []string{}, nil). Search(&dbs.Filters{ And: map[string][]dbs.Filter{ "resource_type": {{Operator: dbs.EQUAL.String(), Value: tools.LIVE_STORAGE.EnumIndex()}}, }, }, "", false) if res.Err != "" { return fmt.Errorf("failed to search LIVE_STORAGE bookings: %s", res.Err) } // Collect unique executionsIDs to avoid redundant teardowns. seen := map[string]bool{} ctx := context.Background() for _, dbo := range res.Data { b, ok := dbo.(*bookingmodel.Booking) if !ok || seen[b.ExecutionsID] { continue } exec := findTerminalExecution(b.ExecutionsID, peerID) if exec == nil { continue } seen[b.ExecutionsID] = true minio := storage.NewMinioSetter(b.ExecutionsID, b.ResourceID) // Determine this peer's role and call the appropriate teardown. if b.DestPeerID == peerID { logger.Info().Msgf("InfraWatchdog(minio): orphaned target resources for exec %s → TeardownAsTarget", b.ExecutionsID) event := storage.MinioDeleteEvent{ ExecutionsID: b.ExecutionsID, MinioID: b.ResourceID, SourcePeerID: b.DestPeerID, DestPeerID: peerID, } go minio.TeardownAsTarget(ctx, event) } else { logger.Info().Msgf("InfraWatchdog(minio): orphaned source resources for exec %s → TeardownAsSource", b.ExecutionsID) event := storage.MinioDeleteEvent{ ExecutionsID: b.ExecutionsID, MinioID: b.ResourceID, SourcePeerID: peerID, DestPeerID: b.DestPeerID, } go minio.TeardownAsSource(ctx, event) } } return nil } // scanOrphanedAdmiraltyNodes lists all Kubernetes nodes, identifies Admiralty // virtual nodes (name prefix "admiralty-{UUID}-") that are NotReady, and // explicitly deletes them when their WorkflowExecution is in a terminal state. // // This covers the gap where the namespace is already gone (or Terminating) but // the virtual node was never cleaned up by the Admiralty controller — which can // happen when the node goes NotReady before the AdmiraltyTarget CRD is deleted. func (s *KubernetesService) scanOrphanedAdmiraltyNodes() error { logger := oclib.GetLogger() serv, err := tools.NewKubernetesService( conf.GetConfig().KubeHost+":"+conf.GetConfig().KubePort, conf.GetConfig().KubeCA, conf.GetConfig().KubeCert, conf.GetConfig().KubeData, ) if err != nil { return fmt.Errorf("failed to init k8s service: %w", err) } ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() nodeList, err := serv.Set.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) if err != nil { return fmt.Errorf("failed to list nodes: %w", err) } myself, err := oclib.GetMySelf() if err != nil { return fmt.Errorf("could not resolve local peer: %w", err) } peerID := myself.GetID() for _, node := range nodeList.Items { // Admiralty virtual nodes are named: admiralty-{executionID}-target-{...} rest := strings.TrimPrefix(node.Name, "admiralty-") if rest == node.Name { continue // not an admiralty node } // UUID is exactly 36 chars: 8-4-4-4-12 if len(rest) < 36 { continue } executionsID := rest[:36] if !uuidNsPattern.MatchString(executionsID) { continue } // Only act on NotReady nodes. ready := false for _, cond := range node.Status.Conditions { if cond.Type == v1.NodeReady { ready = cond.Status == v1.ConditionTrue break } } if ready { continue } exec := findTerminalExecution(executionsID, peerID) if exec == nil { continue } logger.Info().Msgf("InfraWatchdog(admiralty-nodes): NotReady orphaned node %s for terminal execution %s → deleting", node.Name, executionsID) if delErr := serv.Set.CoreV1().Nodes().Delete(ctx, node.Name, metav1.DeleteOptions{}); delErr != nil { logger.Error().Msgf("InfraWatchdog(admiralty-nodes): failed to delete node %s: %v", node.Name, delErr) } } return nil } // scanOrphanedPVC scans LIVE_STORAGE bookings for executions that are in a // terminal state and triggers PVC teardown for each one where this peer holds // the local storage. This covers the case where the Kubernetes namespace was // already deleted (or its teardown was partial) but the PersistentVolume // (cluster-scoped) was never reclaimed. // // A LIVE_STORAGE booking is treated as a local PVC only when ResolveStorageName // returns a non-empty name — the same guard used by teardownPVCForExecution. func (s *KubernetesService) scanOrphanedPVC() error { logger := oclib.GetLogger() myself, err := oclib.GetMySelf() if err != nil { return fmt.Errorf("could not resolve local peer: %w", err) } peerID := myself.GetID() res := oclib.NewRequest(oclib.LibDataEnum(oclib.BOOKING), "", peerID, []string{}, nil). Search(&dbs.Filters{ And: map[string][]dbs.Filter{ "resource_type": {{Operator: dbs.EQUAL.String(), Value: tools.LIVE_STORAGE.EnumIndex()}}, }, }, "", false) if res.Err != "" { return fmt.Errorf("failed to search LIVE_STORAGE bookings: %s", res.Err) } seen := map[string]bool{} ctx := context.Background() for _, dbo := range res.Data { b, ok := dbo.(*bookingmodel.Booking) if !ok || seen[b.ExecutionsID+b.ResourceID] { continue } storageName := storage.ResolveStorageName(b.ResourceID, peerID) if storageName == "" { continue // not a local PVC booking } exec := findTerminalExecution(b.ExecutionsID, peerID) if exec == nil { continue } seen[b.ExecutionsID+b.ResourceID] = true logger.Info().Msgf("InfraWatchdog(pvc): orphaned PVC for exec %s storage %s → TeardownAsSource", b.ExecutionsID, b.ResourceID) event := storage.PVCDeleteEvent{ ExecutionsID: b.ExecutionsID, StorageID: b.ResourceID, StorageName: storageName, SourcePeerID: peerID, DestPeerID: b.DestPeerID, } go storage.NewPVCSetter(b.ExecutionsID, b.ResourceID).TeardownAsSource(ctx, event) } return nil } // findTerminalExecution returns the WorkflowExecution for the given executionsID // if it exists in the DB and is in a terminal state, otherwise nil. func findTerminalExecution(executionsID string, peerID string) *workflow_execution.WorkflowExecution { res := oclib.NewRequest(oclib.LibDataEnum(oclib.WORKFLOW_EXECUTION), "", peerID, []string{}, nil). Search(&dbs.Filters{ And: map[string][]dbs.Filter{ "executions_id": {{Operator: dbs.EQUAL.String(), Value: executionsID}}, }, }, "", false) if res.Err != "" || len(res.Data) == 0 { return nil } exec, ok := res.Data[0].(*workflow_execution.WorkflowExecution) if !ok { return nil } if !infrastructure.ClosingStates[exec.State] { return nil } return exec } // teardownInfraForExecution handles infrastructure cleanup when a workflow terminates. // oc-datacenter is responsible only for infra here — booking/execution state // is managed by oc-scheduler. func (s *KubernetesService) TeardownForExecution(executionID string) { logger := oclib.GetLogger() myself, err := oclib.GetMySelf() if err != nil || myself == nil { return } selfPeerID := myself.GetID() adminReq := &tools.APIRequest{Admin: true} res, _, loadErr := workflow_execution.NewAccessor(adminReq).LoadOne(executionID) if loadErr != nil || res == nil { logger.Warn().Msgf("teardownInfraForExecution: execution %s not found", executionID) return } exec := res.(*workflow_execution.WorkflowExecution) ctx := context.Background() admiralty.NewAdmiraltySetter(s.ExecutionsID).TeardownIfRemote(exec, selfPeerID) storage.NewMinioSetter(s.ExecutionsID, "").TeardownForExecution(ctx, selfPeerID) storage.NewPVCSetter(s.ExecutionsID, "").TeardownForExecution(ctx, selfPeerID) s.CleanupImages(ctx) }