package infrastructure import ( "errors" "fmt" "time" oclib "cloud.o-forge.io/core/oc-lib" "cloud.o-forge.io/core/oc-lib/models/booking/planner" "cloud.o-forge.io/core/oc-lib/models/resources" "cloud.o-forge.io/core/oc-lib/models/workflow" "cloud.o-forge.io/core/oc-lib/tools" ) // --------------------------------------------------------------------------- // Slot availability check // --------------------------------------------------------------------------- const ( checkWindowHours = 5 // how far ahead to scan for a free slot (hours) checkStepMin = 15 // time increment per scan step (minutes) // asapBuffer is the minimum lead time added to time.Now() for as_possible // and WHEN_POSSIBLE bookings. It absorbs NATS propagation + p2p stream // latency so the ExpectedStartDate never arrives already in the past at // the destination peer. asapBuffer = 2 * time.Minute ) // CheckResult holds the outcome of a slot availability check. type CheckResult struct { Available bool `json:"available"` Start time.Time `json:"start"` End *time.Time `json:"end,omitempty"` // NextSlot is the nearest free slot found within checkWindowHours when // the requested slot is unavailable, or the preferred (conflict-free) slot // when running in preemption mode. NextSlot *time.Time `json:"next_slot,omitempty"` Warnings []string `json:"warnings,omitempty"` // Preemptible is true when the check was run in preemption mode. Preemptible bool `json:"preemptible,omitempty"` // SchedulingID is the session identifier the client must supply to Schedule // in order to confirm the draft bookings created during this Check session. SchedulingID string `json:"scheduling_id,omitempty"` } // bookingResource is the minimum info needed to verify a resource against the // planner cache. type bookingResource struct { id string // resource MongoDB _id peerPID string // peer public PeerID (PID) — PlannerCache key instanceID string // resolved from WorkflowSchedule.SelectedInstances } // Check verifies that all booking-relevant resources (storage and compute) of // the given workflow have capacity for the requested time slot. // // - asap=true → ignore ws.Start, begin searching from time.Now() // - preemption → always return Available=true but populate Warnings with // conflicts and NextSlot with the nearest conflict-free alternative func (ws *WorkflowSchedule) Check(wfID string, asap bool, preemption bool, request *tools.APIRequest) (*CheckResult, error) { // 1. Load workflow obj, code, err := workflow.NewAccessor(request).LoadOne(wfID) if code != 200 || err != nil { msg := "could not load workflow " + wfID if err != nil { msg += ": " + err.Error() } return nil, errors.New(msg) } wf := obj.(*workflow.Workflow) // 2. Resolve start start := ws.Start if asap || start.IsZero() { start = time.Now().Add(asapBuffer) } // 3. Resolve end – use explicit end/duration or estimate via Planify end := ws.End if end == nil { if ws.DurationS > 0 { e := start.Add(time.Duration(ws.DurationS * float64(time.Second))) end = &e } else { _, longest, _, _, planErr := wf.Planify( start, nil, ws.SelectedInstances, ws.SelectedPartnerships, ws.SelectedBuyings, ws.SelectedStrategies, int(ws.BookingMode), request, ) if planErr == nil && longest > 0 { e := start.Add(time.Duration(longest) * time.Second) end = &e } } } // 4. Extract booking-relevant (storage + compute) resources from the graph, // resolving the selected instance for each resource. checkables := collectBookingResources(wf, ws.SelectedInstances) // 5. Check every resource against its peer's planner unavailable, warnings := checkResourceAvailability(checkables, start, end) result := &CheckResult{ Start: start, End: end, Warnings: warnings, } // 6. Preemption mode: mark as schedulable regardless of conflicts, but // surface warnings and the nearest conflict-free alternative. if preemption { result.Available = true result.Preemptible = true if len(unavailable) > 0 { result.NextSlot = findNextSlot(checkables, start, end, checkWindowHours) } return result, nil } // 7. All resources are free if len(unavailable) == 0 { result.Available = true return result, nil } // 8. Slot unavailable – locate the nearest free slot within the window result.Available = false result.NextSlot = findNextSlot(checkables, start, end, checkWindowHours) return result, nil } // collectBookingResources returns unique storage and compute resources from the // workflow graph. For each resource the selected instance ID is resolved from // selectedInstances (the scheduler's SelectedInstances ConfigItem) so the planner // check targets the exact instance chosen by the user. func collectBookingResources(wf *workflow.Workflow, selectedInstances workflow.ConfigItem) map[string]bookingResource { if wf.Graph == nil { return nil } seen := map[string]bool{} result := map[string]bookingResource{} // Resolve MongoDB peer _id (DID) → public PeerID (PID) used as PlannerCache key. peerAccess := oclib.NewRequestAdmin(oclib.LibDataEnum(oclib.PEER), nil) didToPID := map[string]string{} resolvePID := func(did string) string { if pid, ok := didToPID[did]; ok { return pid } if data := peerAccess.LoadOne(did); data.Data != nil { if p := data.ToPeer(); p != nil { didToPID[did] = p.PeerID return p.PeerID } } return "" } resolveInstanceID := func(res interface { GetID() string GetCreatorID() string }) string { idx := selectedInstances.Get(res.GetID()) switch r := res.(type) { case *resources.StorageResource: if inst := r.GetSelectedInstance(idx); inst != nil { return inst.GetID() } case *resources.ComputeResource: if inst := r.GetSelectedInstance(idx); inst != nil { return inst.GetID() } } return "" } for _, item := range wf.GetGraphItems(wf.Graph.IsStorage) { i := item _, res := i.GetResource() if res == nil { continue } id := res.GetID() if seen[id] { continue } pid := resolvePID(res.GetCreatorID()) if pid == "" { continue } seen[id] = true result[pid] = bookingResource{ id: id, peerPID: pid, instanceID: resolveInstanceID(res), } } for _, item := range wf.GetGraphItems(wf.Graph.IsCompute) { i := item _, res := i.GetResource() if res == nil { continue } id := res.GetID() if seen[id] { continue } pid := resolvePID(res.GetCreatorID()) if pid == "" { continue } seen[id] = true result[pid] = bookingResource{ id: id, peerPID: pid, instanceID: resolveInstanceID(res), } } return result } // checkResourceAvailability returns the IDs of unavailable resources and // human-readable warning messages. func checkResourceAvailability(res map[string]bookingResource, start time.Time, end *time.Time) (unavailable []string, warnings []string) { for _, r := range res { plannerMu.RLock() entry := PlannerCache[r.peerPID] plannerMu.RUnlock() if entry == nil || entry.Planner == nil { warnings = append(warnings, fmt.Sprintf( "peer %s planner not in cache for resource %s – assuming available", r.peerPID, r.id)) continue } if !checkInstance(entry.Planner, r.id, r.instanceID, start, end) { unavailable = append(unavailable, r.id) warnings = append(warnings, fmt.Sprintf( "resource %s is not available in [%s – %s]", r.id, start.Format(time.RFC3339), formatOptTime(end))) } } return } // checkInstance checks availability for the specific instance resolved by the // scheduler. When instanceID is empty (no instance selected / none resolvable), // it falls back to checking all instances known in the planner and returns true // if any one has remaining capacity. Returns true when no capacity is recorded. func checkInstance(p *planner.Planner, resourceID string, instanceID string, start time.Time, end *time.Time) bool { if instanceID != "" { return p.Check(resourceID, instanceID, nil, start, end) } // Fallback: accept if any known instance has free capacity caps, ok := p.Capacities[resourceID] if !ok || len(caps) == 0 { return true // no recorded usage → assume free } for id := range caps { if p.Check(resourceID, id, nil, start, end) { return true } } return false } // findNextSlot scans forward from 'from' in checkStepMin increments for up to // windowH hours and returns the first candidate start time at which all // resources are simultaneously free. func findNextSlot(resources map[string]bookingResource, from time.Time, originalEnd *time.Time, windowH int) *time.Time { duration := time.Hour if originalEnd != nil { if d := originalEnd.Sub(from); d > 0 { duration = d } } step := time.Duration(checkStepMin) * time.Minute limit := from.Add(time.Duration(windowH) * time.Hour) for t := from.Add(step); t.Before(limit); t = t.Add(step) { e := t.Add(duration) if unavail, _ := checkResourceAvailability(resources, t, &e); len(unavail) == 0 { return &t } } return nil } func formatOptTime(t *time.Time) string { if t == nil { return "open" } return t.Format(time.RFC3339) } // GetWorkflowPeerIDs loads the workflow and returns the deduplicated list of // creator peer IDs for all its storage and compute resources. // These are the peers whose planners must be watched by a check stream. func GetWorkflowPeerIDs(wfID string, request *tools.APIRequest) ([]string, error) { obj, code, err := workflow.NewAccessor(request).LoadOne(wfID) if code != 200 || err != nil { msg := "could not load workflow " + wfID if err != nil { msg += ": " + err.Error() } return nil, errors.New(msg) } wf := obj.(*workflow.Workflow) if wf.Graph == nil { return nil, nil } seen := map[string]bool{} var peerIDs []string for _, item := range wf.GetGraphItems(wf.Graph.IsStorage) { i := item _, res := i.GetResource() if res == nil { continue } if id := res.GetCreatorID(); id != "" && !seen[id] { seen[id] = true peerIDs = append(peerIDs, id) } } for _, item := range wf.GetGraphItems(wf.Graph.IsCompute) { i := item _, res := i.GetResource() if res == nil { continue } if id := res.GetCreatorID(); id != "" && !seen[id] { seen[id] = true peerIDs = append(peerIDs, id) } } realPeersID := []string{} access := oclib.NewRequestAdmin(oclib.LibDataEnum(tools.PEER), nil) for _, id := range peerIDs { if data := access.LoadOne(id); data.Data != nil { realPeersID = append(realPeersID, data.ToPeer().PeerID) } } return realPeersID, nil }