diff --git a/controllers/loki.go b/controllers/loki.go
index 9120038..5dc6f1c 100644
--- a/controllers/loki.go
+++ b/controllers/loki.go
@@ -58,7 +58,6 @@ func (o *LokiController) GetLogs() {
path += "?query={" + strings.Join(query, ", ") + "}&start=" + start + "&end=" + end
resp, err := http.Get(config.GetConfig().LokiUrl + path) // CALL
- fmt.Println(resp, path)
if err != nil {
o.Ctx.ResponseWriter.WriteHeader(422)
o.Data["json"] = map[string]string{"error": err.Error()}
diff --git a/controllers/sheduler.go b/controllers/sheduler.go
new file mode 100644
index 0000000..f185644
--- /dev/null
+++ b/controllers/sheduler.go
@@ -0,0 +1,265 @@
+package controllers
+
+import (
+ "fmt"
+ "net/http"
+ "oc-scheduler/infrastructure"
+ "strings"
+
+ oclib "cloud.o-forge.io/core/oc-lib"
+ "cloud.o-forge.io/core/oc-lib/dbs"
+ "cloud.o-forge.io/core/oc-lib/tools"
+ beego "github.com/beego/beego/v2/server/web"
+ "github.com/google/uuid"
+ gorillaws "github.com/gorilla/websocket"
+)
+
+var orderCollection = oclib.LibDataEnum(oclib.ORDER)
+var logger = oclib.GetLogger()
+
+// Operations about workflow
+type WorkflowSchedulerController struct {
+ beego.Controller
+}
+
+var wsUpgrader = gorillaws.Upgrader{
+ CheckOrigin: func(r *http.Request) bool { return true },
+}
+
+// CheckStreamHandler is the WebSocket handler for slot availability checking.
+// It is invoked via the CheckStream controller method.
+// Query params: as_possible=true, preemption=true
+func CheckStreamHandler(w http.ResponseWriter, r *http.Request) {
+ wfID := strings.TrimSuffix(
+ strings.TrimPrefix(r.URL.Path, "/oc/"),
+ "/check",
+ )
+
+ q := r.URL.Query()
+ asap := q.Get("as_possible") == "true"
+ preemption := q.Get("preemption") == "true"
+
+ user, peerID, groups := oclib.ExtractTokenInfo(*r)
+ req := &tools.APIRequest{
+ Username: user,
+ PeerID: peerID,
+ Groups: groups,
+ Caller: nil,
+ Admin: true,
+ }
+
+ watchedPeers, err := infrastructure.GetWorkflowPeerIDs(wfID, req)
+ fmt.Println("Here my watched peers involved in workflow", watchedPeers)
+ if err != nil {
+ http.Error(w, `{"code":404,"error":"`+err.Error()+`"}`, http.StatusNotFound)
+ return
+ }
+
+ conn, err := wsUpgrader.Upgrade(w, r, nil)
+ if err != nil {
+ return
+ }
+
+ var ws infrastructure.WorkflowSchedule
+ if err := conn.ReadJSON(&ws); err != nil {
+ conn.Close()
+ return
+ }
+
+ plannerCh, plannerUnsub := infrastructure.SubscribePlannerUpdates(watchedPeers)
+ wfCh, wfUnsub := infrastructure.SubscribeWorkflowUpdates(wfID)
+
+ executionsID := uuid.New().String()
+ ownedPeers := infrastructure.RequestPlannerRefresh(watchedPeers, executionsID)
+
+ selfID, err := oclib.GetMySelf()
+ if err != nil || selfID == nil {
+ logger.Err(err).Msg(err.Error())
+ return
+ }
+ selfPeerID := ""
+ if selfID != nil {
+ selfPeerID = selfID.PeerID
+ }
+
+ // scheduled=true once bookings/purchases/exec have been created for this session.
+ scheduled := false
+ confirmed := false
+
+ defer func() {
+ conn.Close()
+ plannerUnsub()
+ wfUnsub()
+
+ infrastructure.ReleaseRefreshOwnership(ownedPeers, executionsID)
+ if !confirmed {
+ infrastructure.CleanupSession(selfID, executionsID, selfID, req)
+ }
+ }()
+
+ // pushCheck runs an availability check and sends the result to the client.
+ // If reschedule=true and the slot is available, it also creates/updates
+ // bookings, purchases and the execution draft for this session.
+ pushCheck := func(reschedule bool) error {
+ result, checkErr := ws.Check(wfID, asap, preemption, req)
+ if checkErr != nil {
+ return checkErr
+ }
+ if result.Available && reschedule {
+ // Sync the resolved start/end back to ws so that UpsertSessionDrafts
+ // creates bookings/purchases with the actual scheduled dates (not the
+ // raw client value which may be zero or pre-asapBuffer).
+ ws.Start = result.Start
+ if result.End != nil {
+ ws.End = result.End
+ }
+ ws.UpsertSessionDrafts(wfID, executionsID, selfID, req)
+ scheduled = true
+ }
+ result.SchedulingID = executionsID
+ return conn.WriteJSON(result)
+ }
+
+ // Initial check + schedule.
+ if err := pushCheck(true); err != nil {
+ return
+ }
+
+ updateCh := make(chan infrastructure.WorkflowSchedule, 1)
+ closeCh := make(chan struct{})
+ go func() {
+ defer close(closeCh)
+ for {
+ var updated infrastructure.WorkflowSchedule
+ if err := conn.ReadJSON(&updated); err != nil {
+ return
+ }
+ select {
+ case updateCh <- updated:
+ default:
+ <-updateCh
+ updateCh <- updated
+ }
+ }
+ }()
+
+ for {
+ select {
+ case updated := <-updateCh:
+ if updated.Confirm {
+ // Confirm: flip bookings/purchases to IsDraft=false, then let
+ // the considers mechanism transition exec to IsDraft=false.
+ ws.UUID = executionsID
+ _, _, _, schedErr := ws.Schedules(wfID, req)
+ if schedErr != nil {
+ _ = conn.WriteJSON(map[string]interface{}{
+ "error": schedErr.Error(),
+ })
+ return
+ }
+ confirmed = true
+ return
+ }
+ infrastructure.CleanupSession(selfID, executionsID, selfID, req)
+ // Detect whether the user changed dates or instances.
+ datesChanged := !updated.Start.Equal(ws.Start) ||
+ updated.DurationS != ws.DurationS ||
+ (updated.End == nil) != (ws.End == nil) ||
+ (updated.End != nil && ws.End != nil && !updated.End.Equal(*ws.End))
+ ws = updated
+ // Reschedule when dates changed or we haven't scheduled yet.
+ if err := pushCheck(datesChanged || !scheduled); err != nil {
+ return
+ }
+
+ case remotePeerID := <-plannerCh:
+ if remotePeerID == selfPeerID {
+ // Our own planner updated (caused by our local booking store).
+ // Just resend the current availability result without rescheduling
+ // to avoid an infinite loop.
+ result, checkErr := ws.Check(wfID, asap, preemption, req)
+ if checkErr == nil {
+ result.SchedulingID = executionsID
+ _ = conn.WriteJSON(result)
+ }
+ continue
+ }
+ // A remote peer's planner changed. Re-check; if our slot is now
+ // taken and we were already scheduled, reschedule at the new slot.
+ result, checkErr := ws.Check(wfID, asap, preemption, req)
+ if checkErr != nil {
+ return
+ }
+ if !result.Available && scheduled {
+ // Move to the next free slot and reschedule.
+ if result.NextSlot != nil {
+ ws.Start = *result.NextSlot
+ }
+ if err := pushCheck(true); err != nil {
+ return
+ }
+ } else {
+ result.SchedulingID = executionsID
+ _ = conn.WriteJSON(result)
+ }
+
+ case <-wfCh:
+ if newPeers, err := infrastructure.GetWorkflowPeerIDs(wfID, req); err == nil {
+ plannerUnsub()
+ watchedPeers = newPeers
+ plannerCh, plannerUnsub = infrastructure.SubscribePlannerUpdates(newPeers)
+ newOwned := infrastructure.RequestPlannerRefresh(newPeers, executionsID)
+ ownedPeers = append(ownedPeers, newOwned...)
+ }
+ if err := pushCheck(false); err != nil {
+ return
+ }
+
+ case <-closeCh:
+ return
+ }
+ }
+}
+
+// @Title UnSchedule
+// @Description unschedule a workflow execution: deletes its bookings on all peers then deletes the execution.
+// @Param id path string true "execution id"
+// @Success 200 {object} map[string]interface{}
+// @router /:id [delete]
+func (o *WorkflowSchedulerController) UnSchedule() {
+ user, peerID, groups := oclib.ExtractTokenInfo(*o.Ctx.Request)
+ executionID := o.Ctx.Input.Param(":id")
+ req := &tools.APIRequest{
+ Username: user,
+ PeerID: peerID,
+ Groups: groups,
+ Admin: true,
+ }
+ selfID, _ := oclib.GetMySelf()
+ if err := infrastructure.UnscheduleExecution(executionID, selfID, req); err != nil {
+ o.Data["json"] = map[string]interface{}{"code": 404, "error": err.Error()}
+ } else {
+ o.Data["json"] = map[string]interface{}{"code": 200, "error": ""}
+ }
+ o.ServeJSON()
+}
+
+// @Title SearchScheduledDraftOrder
+// @Description schedule workflow
+// @Param id path string true "id execution"
+// @Success 200 {workspace} models.workspace
+// @router /:id/order [get]
+func (o *WorkflowSchedulerController) SearchScheduledDraftOrder() {
+ _, peerID, _ := oclib.ExtractTokenInfo(*o.Ctx.Request)
+ id := o.Ctx.Input.Param(":id")
+ filter := &dbs.Filters{
+ And: map[string][]dbs.Filter{
+ "workflow_id": {{Operator: dbs.EQUAL.String(), Value: id}},
+ "order_by": {{Operator: dbs.EQUAL.String(), Value: peerID}},
+ },
+ }
+ o.Data["json"] = oclib.NewRequestAdmin(orderCollection, nil).Search(filter, "", true)
+
+ //o.Data["json"] = oclib.NewRequest(orderCollection, user, peerID, groups, nil).Search(filter, "", true)
+ o.ServeJSON()
+}
diff --git a/controllers/workflow_sheduler.go b/controllers/workflow_sheduler.go
deleted file mode 100644
index dd469b3..0000000
--- a/controllers/workflow_sheduler.go
+++ /dev/null
@@ -1,270 +0,0 @@
-package controllers
-
-import (
- "encoding/json"
- "fmt"
- "net/http"
- "oc-scheduler/infrastructure"
- "strings"
-
- oclib "cloud.o-forge.io/core/oc-lib"
- "cloud.o-forge.io/core/oc-lib/dbs"
- "cloud.o-forge.io/core/oc-lib/tools"
- beego "github.com/beego/beego/v2/server/web"
- "github.com/google/uuid"
- gorillaws "github.com/gorilla/websocket"
-)
-
-var orderCollection = oclib.LibDataEnum(oclib.ORDER)
-var logger = oclib.GetLogger()
-
-// Operations about workflow
-type WorkflowSchedulerController struct {
- beego.Controller
-}
-
-// @Title Schedule
-// @Description schedule workflow
-// @Param id path string true "id execution"
-// @Param body body models.compute true "The compute content"
-// @Success 200 {workspace} models.workspace
-// @router /:id [post]
-func (o *WorkflowSchedulerController) Schedule() {
- logger := oclib.GetLogger()
-
- code := 200
- e := ""
- user, peerID, groups := oclib.ExtractTokenInfo(*o.Ctx.Request)
- wfId := o.Ctx.Input.Param(":id")
- var resp *infrastructure.WorkflowSchedule
- json.Unmarshal(o.Ctx.Input.CopyBody(100000), &resp)
-
- logger.Info().Msg("Booking for " + wfId)
- req := oclib.NewRequestAdmin(collection, nil)
- // req := oclib.NewRequest(collection, user, peerID, groups, caller)
- resp.UUID = uuid.New().String()
- fmt.Println(user, peerID, groups)
- sch, _, _, err := resp.Schedules(wfId, &tools.APIRequest{
- Username: user,
- PeerID: peerID,
- Groups: groups,
- Caller: nil,
- Admin: true,
- })
- if err != nil {
- if sch != nil {
- for _, w := range sch.WorkflowExecution {
- req.DeleteOne(w.GetID())
- }
- }
- o.Data["json"] = map[string]interface{}{
- "data": nil,
- "code": 409,
- "error": "Error when scheduling your execution(s): " + err.Error(),
- }
- o.ServeJSON()
- return
- }
- o.Data["json"] = map[string]interface{}{
- "data": sch.WorkflowExecution,
- "code": code,
- "error": e,
- }
- o.ServeJSON()
-}
-
-var wsUpgrader = gorillaws.Upgrader{
- CheckOrigin: func(r *http.Request) bool { return true },
-}
-
-// @Title CheckStream
-// @Description WebSocket stream for slot availability checking.
-// @Param id path string true "workflow id"
-// @Param as_possible query bool false "search from now"
-// @Param preemption query bool false "force-valid, surface warnings"
-// @router /:id/check [get]
-func (o *WorkflowSchedulerController) CheckStream() {
- CheckStreamHandler(o.Ctx.ResponseWriter, o.Ctx.Request)
-}
-
-// CheckStreamHandler is the WebSocket handler for slot availability checking.
-// It is invoked via the CheckStream controller method.
-// Query params: as_possible=true, preemption=true
-func CheckStreamHandler(w http.ResponseWriter, r *http.Request) {
- wfID := strings.TrimSuffix(
- strings.TrimPrefix(r.URL.Path, "/oc/"),
- "/check",
- )
-
- q := r.URL.Query()
- asap := q.Get("as_possible") == "true"
- preemption := q.Get("preemption") == "true"
-
- user, peerID, groups := oclib.ExtractTokenInfo(*r)
- req := &tools.APIRequest{
- Username: user,
- PeerID: peerID,
- Groups: groups,
- Caller: nil,
- Admin: true,
- }
-
- // Resolve the peer IDs concerned by this workflow before upgrading so we
- // can abort cleanly with a plain HTTP error if the workflow is not found.
- watchedPeers, err := infrastructure.GetWorkflowPeerIDs(wfID, req)
- fmt.Println("Here my watched peers involved in workflow", watchedPeers)
- if err != nil {
- http.Error(w, `{"code":404,"error":"`+err.Error()+`"}`, http.StatusNotFound)
- return
- }
-
- // Upgrade to WebSocket.
- conn, err := wsUpgrader.Upgrade(w, r, nil)
- if err != nil {
- // gorilla already wrote the error response
- return
- }
-
- // Read the schedule parameters sent by the client as the first message.
- var ws infrastructure.WorkflowSchedule
- if err := conn.ReadJSON(&ws); err != nil {
- conn.Close()
- return
- }
-
- // Subscribe to planner updates for the initially resolved peers and to
- // workflow change notifications (peer list may change on workflow edit).
- plannerCh, plannerUnsub := infrastructure.SubscribePlannerUpdates(watchedPeers)
- wfCh, wfUnsub := infrastructure.SubscribeWorkflowUpdates(wfID)
-
- // Unique ID for this check session — used to track refresh ownership.
- sessionID := uuid.New().String()
-
- // Request a fresh planner snapshot for every concerned peer.
- // The first session to claim a peer becomes its refresh owner; others skip
- // the duplicate PB_PLANNER emission. ownedPeers grows if the workflow's
- // peer list changes (wfCh).
- ownedPeers := infrastructure.RequestPlannerRefresh(watchedPeers, sessionID)
-
- // Cleanup on exit (clean or forced): release refresh ownership for the
- // peers this session claimed, which resets Refreshing state and emits
- // PB_CLOSE_PLANNER so oc-discovery stops the planner stream.
- defer func() {
- conn.Close()
- plannerUnsub()
- wfUnsub()
- infrastructure.ReleaseRefreshOwnership(ownedPeers, sessionID)
- }()
-
- push := func() error {
- result, checkErr := ws.Check(wfID, asap, preemption, req)
- fmt.Println(result, checkErr)
- if checkErr != nil {
- return checkErr
- }
- return conn.WriteJSON(result)
- }
-
- // Initial check.
- if err := push(); err != nil {
- return
- }
-
- // Read loop: detect client-side close and parse schedule parameter
- // updates (date changes, booking mode changes, …) sent by the client.
- updateCh := make(chan infrastructure.WorkflowSchedule, 1)
- closeCh := make(chan struct{})
- go func() {
- defer close(closeCh)
- for {
- var updated infrastructure.WorkflowSchedule
- if err := conn.ReadJSON(&updated); err != nil {
- // Connection closed or unrecoverable read error.
- return
- }
- // Drop the oldest pending update if the consumer hasn't caught up.
- select {
- case updateCh <- updated:
- default:
- <-updateCh
- updateCh <- updated
- }
- }
- }()
-
- // Stream loop.
- for {
- select {
- case updated := <-updateCh:
- // The client changed the requested date/params: adopt the new
- // schedule and re-run the check immediately.
- ws = updated
- if err := push(); err != nil {
- return
- }
- case <-wfCh:
- // The workflow was modified: refresh the peer list and re-subscribe
- // so the stream watches the correct set of planners going forward.
- if newPeers, err := infrastructure.GetWorkflowPeerIDs(wfID, req); err == nil {
- plannerUnsub()
- watchedPeers = newPeers
- plannerCh, plannerUnsub = infrastructure.SubscribePlannerUpdates(newPeers)
- // Claim refresh ownership for any newly added peers.
- newOwned := infrastructure.RequestPlannerRefresh(newPeers, sessionID)
- ownedPeers = append(ownedPeers, newOwned...)
- }
- if err := push(); err != nil {
- return
- }
- case <-plannerCh:
- // A planner snapshot arrived (or was evicted): re-evaluate.
- if err := push(); err != nil {
- return
- }
- case <-closeCh:
- return
- }
- }
-}
-
-// @Title UnSchedule
-// @Description schedule workflow
-// @Param id path string true "id execution"
-// @Param body body models.compute true "The compute content"
-// @Success 200 {workspace} models.workspace
-// @router /:id [delete]
-func (o *WorkflowSchedulerController) UnSchedule() {
- // user, peerID, groups := oclib.ExtractTokenInfo(*o.Ctx.Request)
- id := o.Ctx.Input.Param(":id")
- // TODO UNSCHEDULER
- filter := &dbs.Filters{
- And: map[string][]dbs.Filter{
- "workflow_id": {{Operator: dbs.EQUAL.String(), Value: id}},
- },
- }
- o.Data["json"] = oclib.NewRequestAdmin(collection, nil).Search(filter, "", true)
-
- // o.Data["json"] = oclib.NewRequest(collection, user, peerID, groups, nil).Search(filter, "", true)
- o.ServeJSON()
-}
-
-// @Title SearchScheduledDraftOrder
-// @Description schedule workflow
-// @Param id path string true "id execution"
-// @Success 200 {workspace} models.workspace
-// @router /:id/order [get]
-func (o *WorkflowSchedulerController) SearchScheduledDraftOrder() {
- user, peerID, groups := oclib.ExtractTokenInfo(*o.Ctx.Request)
- id := o.Ctx.Input.Param(":id")
- fmt.Println(user, peerID, groups)
- filter := &dbs.Filters{
- And: map[string][]dbs.Filter{
- "workflow_id": {{Operator: dbs.EQUAL.String(), Value: id}},
- "order_by": {{Operator: dbs.EQUAL.String(), Value: peerID}},
- },
- }
- o.Data["json"] = oclib.NewRequestAdmin(orderCollection, nil).Search(filter, "", true)
-
- //o.Data["json"] = oclib.NewRequest(orderCollection, user, peerID, groups, nil).Search(filter, "", true)
- o.ServeJSON()
-}
diff --git a/go.mod b/go.mod
index ac0c67a..642e0e6 100644
--- a/go.mod
+++ b/go.mod
@@ -3,7 +3,7 @@ module oc-scheduler
go 1.25.0
require (
- cloud.o-forge.io/core/oc-lib v0.0.0-20260312105633-a30173921f67
+ cloud.o-forge.io/core/oc-lib v0.0.0-20260317090440-1ac735cef10e
github.com/beego/beego/v2 v2.3.8
github.com/google/uuid v1.6.0
github.com/robfig/cron v1.2.0
@@ -61,7 +61,6 @@ require (
github.com/hashicorp/golang-lru v1.0.2 // indirect
github.com/jtolds/gls v4.20.0+incompatible // indirect
github.com/klauspost/compress v1.18.0 // indirect
- github.com/kr/text v0.2.0 // indirect
github.com/leodido/go-urn v1.4.0 // indirect
github.com/libp2p/go-libp2p/core v0.43.0-rc2 // indirect
github.com/mattn/go-colorable v0.1.14 // indirect
@@ -69,7 +68,7 @@ require (
github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/montanaflynn/stats v0.7.1 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
- github.com/nats-io/nats.go v1.44.0 // indirect
+ github.com/nats-io/nats.go v1.44.0
github.com/nats-io/nkeys v0.4.11 // indirect
github.com/nats-io/nuid v1.0.1 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
@@ -77,7 +76,6 @@ require (
github.com/prometheus/client_model v0.6.2 // indirect
github.com/prometheus/common v0.65.0 // indirect
github.com/prometheus/procfs v0.17.0 // indirect
- github.com/rogpeppe/go-internal v1.14.1 // indirect
github.com/rs/zerolog v1.34.0 // indirect
github.com/shiena/ansicolor v0.0.0-20230509054315-a9deabde6e02 // indirect
github.com/smartystreets/assertions v1.2.0 // indirect
diff --git a/go.sum b/go.sum
index dccec6e..6a00bee 100644
--- a/go.sum
+++ b/go.sum
@@ -1,34 +1,36 @@
-cloud.o-forge.io/core/oc-lib v0.0.0-20260223141827-5d32b4646a86 h1:/7XYbCzzo062lYbyBM3MA7KLrJII9iCQzvw4T5g/4oY=
-cloud.o-forge.io/core/oc-lib v0.0.0-20260223141827-5d32b4646a86/go.mod h1:jmyBwmsac/4V7XPL347qawF60JsBCDmNAMfn/ySXKYo=
-cloud.o-forge.io/core/oc-lib v0.0.0-20260223142248-b08bbf51ddc5 h1:qxLz4rrFxB1dmJa0/Q6AWBwQgmVt7LVXB0RgwpGYeXE=
-cloud.o-forge.io/core/oc-lib v0.0.0-20260223142248-b08bbf51ddc5/go.mod h1:jmyBwmsac/4V7XPL347qawF60JsBCDmNAMfn/ySXKYo=
-cloud.o-forge.io/core/oc-lib v0.0.0-20260223144148-f28e2c362020 h1:F7Ifw3WgtCnDur1p5+EuFZrM9yy7KSWoIyDQ8opQE90=
-cloud.o-forge.io/core/oc-lib v0.0.0-20260223144148-f28e2c362020/go.mod h1:jmyBwmsac/4V7XPL347qawF60JsBCDmNAMfn/ySXKYo=
-cloud.o-forge.io/core/oc-lib v0.0.0-20260223145010-e10bb5545561 h1:q5m2UMsEgrfN0OJsoa4Sme0v4OO1pnIt8OsAwdL+5/A=
-cloud.o-forge.io/core/oc-lib v0.0.0-20260223145010-e10bb5545561/go.mod h1:jmyBwmsac/4V7XPL347qawF60JsBCDmNAMfn/ySXKYo=
-cloud.o-forge.io/core/oc-lib v0.0.0-20260223145640-e039fa56b64c h1:3PRvQdSSGjmw+Txkf0zWs3F+V9URq22zQCLR3o7bNBY=
-cloud.o-forge.io/core/oc-lib v0.0.0-20260223145640-e039fa56b64c/go.mod h1:jmyBwmsac/4V7XPL347qawF60JsBCDmNAMfn/ySXKYo=
-cloud.o-forge.io/core/oc-lib v0.0.0-20260223162637-ff830065ec27 h1:cw3R1/Ivlr3W1XZ2cCHRrLB6UG/3dhdvG0i+P5W1tYc=
-cloud.o-forge.io/core/oc-lib v0.0.0-20260223162637-ff830065ec27/go.mod h1:jmyBwmsac/4V7XPL347qawF60JsBCDmNAMfn/ySXKYo=
-cloud.o-forge.io/core/oc-lib v0.0.0-20260224092928-54aef164ba10 h1:9i8fDtGjg3JDniCO7VGtkd8zHXWze7OJ3tvO4mZnBmY=
-cloud.o-forge.io/core/oc-lib v0.0.0-20260224092928-54aef164ba10/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
-cloud.o-forge.io/core/oc-lib v0.0.0-20260224093610-a9ebad78f3a8 h1:xoC5PAz1469QxrNm8rrsq5+BtwshEt+L2Nhf90MrqrM=
-cloud.o-forge.io/core/oc-lib v0.0.0-20260224093610-a9ebad78f3a8/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
-cloud.o-forge.io/core/oc-lib v0.0.0-20260304145747-e03a0d3dd0aa h1:1wCpI4dwN1pj6MlpJ7/WifhHVHmCE4RU+9klwqgo/bk=
-cloud.o-forge.io/core/oc-lib v0.0.0-20260304145747-e03a0d3dd0aa/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
-cloud.o-forge.io/core/oc-lib v0.0.0-20260311072518-933b7147e908 h1:1jz3xI/u2FzCG8phY7ShqADrmCj0mlrdjbdNUosSwgs=
-cloud.o-forge.io/core/oc-lib v0.0.0-20260311072518-933b7147e908/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
-cloud.o-forge.io/core/oc-lib v0.0.0-20260311084029-97bfb0582a99 h1:60BGJeR9uvpDwvNeWqVBnB2JjWLOZv16sUGZjzXSQlg=
-cloud.o-forge.io/core/oc-lib v0.0.0-20260311084029-97bfb0582a99/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
-cloud.o-forge.io/core/oc-lib v0.0.0-20260312073634-2c9c42dd516a h1:oCkb9l/Cvn0x6iicxIydrjfCNU+UHhKuklFgfzDa174=
-cloud.o-forge.io/core/oc-lib v0.0.0-20260312073634-2c9c42dd516a/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
-cloud.o-forge.io/core/oc-lib v0.0.0-20260312083310-f5e199132416 h1:QHR5pzCI/HUawu8pst5Ggio6WPCUUf8XYjNMVk8kSqo=
-cloud.o-forge.io/core/oc-lib v0.0.0-20260312083310-f5e199132416/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
-cloud.o-forge.io/core/oc-lib v0.0.0-20260312104524-e28b79ac0d62 h1:sHzacZxPIKHyjL4EkgG/c7MI8gM1xmLdhaoUx2ZsH+M=
-cloud.o-forge.io/core/oc-lib v0.0.0-20260312104524-e28b79ac0d62/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
-cloud.o-forge.io/core/oc-lib v0.0.0-20260312105633-a30173921f67 h1:x6klvxiRpU1KcvmygIcHGDHFW3CbWC05El6Fryvr3uo=
-cloud.o-forge.io/core/oc-lib v0.0.0-20260312105633-a30173921f67/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
+cloud.o-forge.io/core/oc-lib v0.0.0-20260112144037-c35b06e0bc3c h1:9lXrj1agE1clFfxOXRrVXi4PEvlAuWKb4z977c2uk4k=
+cloud.o-forge.io/core/oc-lib v0.0.0-20260112144037-c35b06e0bc3c/go.mod h1:vHWauJsS6ryf7UDqq8hRXoYD5RsONxcFTxeZPOztEuI=
+cloud.o-forge.io/core/oc-lib v0.0.0-20260316075231-465b91fd6ecb h1:yO8KQpNHYIv4O6LrkRacFsTQrLv5qYYeO8KD1e1eunA=
+cloud.o-forge.io/core/oc-lib v0.0.0-20260316075231-465b91fd6ecb/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
+cloud.o-forge.io/core/oc-lib v0.0.0-20260316082848-9e5102893f8a h1:4HHebXbTSzkD1MG/1GU5kZx45xx9IQ0sibndPuarlp0=
+cloud.o-forge.io/core/oc-lib v0.0.0-20260316082848-9e5102893f8a/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
+cloud.o-forge.io/core/oc-lib v0.0.0-20260316094939-48f034316b91 h1:wm4oeR1mQE1esHAte9dYB8HC+pjY+G7zwfgQUxPO5g8=
+cloud.o-forge.io/core/oc-lib v0.0.0-20260316094939-48f034316b91/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
+cloud.o-forge.io/core/oc-lib v0.0.0-20260316095931-a86e78841b34 h1:OxxfSNhdkqX165YzurzicnrU55s6n4pZjOg+HmkDzUc=
+cloud.o-forge.io/core/oc-lib v0.0.0-20260316095931-a86e78841b34/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
+cloud.o-forge.io/core/oc-lib v0.0.0-20260316104105-deb819c5af95 h1:efOmy48+aw8vGGqHHUfrxVQJq0TlIux0/4aDcH7Wcpw=
+cloud.o-forge.io/core/oc-lib v0.0.0-20260316104105-deb819c5af95/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
+cloud.o-forge.io/core/oc-lib v0.0.0-20260316104558-4a076ba23738 h1:L/xd9d1MCyhwQLwiuaAzID7pRUnotikGSe7jhSqtqPs=
+cloud.o-forge.io/core/oc-lib v0.0.0-20260316104558-4a076ba23738/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
+cloud.o-forge.io/core/oc-lib v0.0.0-20260316104751-40a986af41b8 h1:02FkLYGjbGp/gq8Ej31KIXwF8QaQzJG/IODQt6GogT8=
+cloud.o-forge.io/core/oc-lib v0.0.0-20260316104751-40a986af41b8/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
+cloud.o-forge.io/core/oc-lib v0.0.0-20260316113239-6d8efd137ac5 h1:NF+TYU0it9cWsrTGngv9KVGgrglMCO522/huR2RJNu0=
+cloud.o-forge.io/core/oc-lib v0.0.0-20260316113239-6d8efd137ac5/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
+cloud.o-forge.io/core/oc-lib v0.0.0-20260316114821-9bf2c566e922 h1:B1DzkKyidaSLC7cdJ3jg+kQR9gU20DlGS+KjI8SmlDg=
+cloud.o-forge.io/core/oc-lib v0.0.0-20260316114821-9bf2c566e922/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
+cloud.o-forge.io/core/oc-lib v0.0.0-20260316121650-a4d81cbb67f4 h1:k/xjsnRPIQjoaXp59x0CdwncpJa8KV7Fiyf78fgx7Ao=
+cloud.o-forge.io/core/oc-lib v0.0.0-20260316121650-a4d81cbb67f4/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
+cloud.o-forge.io/core/oc-lib v0.0.0-20260316145919-b110cbc260c9 h1:+Yk9oHpChZhQYce2GY3HnFfW6AdeYAO31kczhwwpKgc=
+cloud.o-forge.io/core/oc-lib v0.0.0-20260316145919-b110cbc260c9/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
+cloud.o-forge.io/core/oc-lib v0.0.0-20260317080147-9b2f9451767e h1:tKipQ9WFDJZXgex5MORwI3v0lJsEPaHHeIJqVWA3Vzk=
+cloud.o-forge.io/core/oc-lib v0.0.0-20260317080147-9b2f9451767e/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
+cloud.o-forge.io/core/oc-lib v0.0.0-20260317083202-65237f0d1f3f h1:X8ytAjBzEqnFL1YQnjm9Ol/aoCiU/H6IgdzX74ZhFig=
+cloud.o-forge.io/core/oc-lib v0.0.0-20260317083202-65237f0d1f3f/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
+cloud.o-forge.io/core/oc-lib v0.0.0-20260317090440-1ac735cef10e h1:e/oYMPAqD27l3Rd473Xny/2Ut/LZnBYXAzfQArNOmrs=
+cloud.o-forge.io/core/oc-lib v0.0.0-20260317090440-1ac735cef10e/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
+github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0=
+github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
github.com/beego/beego/v2 v2.3.8 h1:wplhB1pF4TxR+2SS4PUej8eDoH4xGfxuHfS7wAk9VBc=
github.com/beego/beego/v2 v2.3.8/go.mod h1:8vl9+RrXqvodrl9C8yivX1e6le6deCK6RWeq8R7gTTg=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
@@ -77,6 +79,8 @@ github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJn
github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
github.com/go-playground/validator/v10 v10.27.0 h1:w8+XrWVMhGkxOaaowyKH35gFydVHOvC0/uWoy2Fzwn4=
github.com/go-playground/validator/v10 v10.27.0/go.mod h1:I5QpIEbmr8On7W0TktmJAumgzX4CA1XNl4ZmDuVHKKo=
+github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
+github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/golang/snappy v1.0.0 h1:Oy607GVXHs7RtbggtPBnr2RmDArIsAefDwvrdWvRhGs=
github.com/golang/snappy v1.0.0/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
@@ -85,14 +89,14 @@ github.com/google/gnostic-models v0.7.0/go.mod h1:whL5G0m6dmc5cPxKc5bdKdEN3UjI7O
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8=
+github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1 h1:EGx4pi6eqNxGaHF6qqu48+N2wcFQ5qg5FXgOdqsJ5d8=
github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
github.com/goraz/onion v0.1.3 h1:KhyvbDA2b70gcz/d5izfwTiOH8SmrvV43AsVzpng3n0=
github.com/goraz/onion v0.1.3/go.mod h1:XEmz1XoBz+wxTgWB8NwuvRm4RAu3vKxvrmYtzK+XCuQ=
-github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
-github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 h1:JeSE6pjso5THxAzdVpqr6/geYxZytqFMBCOtn/ujyeo=
github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674/go.mod h1:r4w70xmWCQKmi1ONH4KIaBptdivuRPyosB9RmPlGEwA=
github.com/hashicorp/golang-lru v1.0.2 h1:dV3g9Z/unq5DpblPpw+Oqcv4dU/1omnb4Ok8iPY6p1c=
@@ -176,6 +180,10 @@ github.com/nats-io/nkeys v0.4.11/go.mod h1:szDimtgmfOi9n25JpfIdGw12tZFYXqhGxjhVx
github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw=
github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
github.com/ogier/pflag v0.0.1/go.mod h1:zkFki7tvTa0tafRvTBIZTvzYyAu6kQhPZFnshFFPE+g=
+github.com/onsi/ginkgo/v2 v2.27.2 h1:LzwLj0b89qtIy6SSASkzlNvX6WktqurSHwkk2ipF/Ns=
+github.com/onsi/ginkgo/v2 v2.27.2/go.mod h1:ArE1D/XhNXBXCBkKOLkbsb2c81dQHCRcF5zwn/ykDRo=
+github.com/onsi/gomega v1.38.2 h1:eZCjf2xjZAqe+LeWvKb5weQ+NcPwX84kqJ0cZNxok2A=
+github.com/onsi/gomega v1.38.2/go.mod h1:W2MJcYxRGV63b418Ai34Ud0hEdTVXq9NW9+Sx6uXf3k=
github.com/pelletier/go-toml v1.6.0/go.mod h1:5N711Q9dKgbdkxHL+MEfF31hpT7l0S0s/t2kKREewys=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
@@ -191,8 +199,7 @@ github.com/prometheus/procfs v0.17.0 h1:FuLQ+05u4ZI+SS/w9+BWEM2TXiHKsUQ9TADiRH7D
github.com/prometheus/procfs v0.17.0/go.mod h1:oPQLaDAMRbA+u8H5Pbfq+dl3VDAvHxMUOVhe0wYB2zw=
github.com/robfig/cron v1.2.0 h1:ZjScXvvxeQ63Dbyxy76Fj3AT3Ut0aKsyd2/tl3DTMuQ=
github.com/robfig/cron v1.2.0/go.mod h1:JGuDeoQd7Z6yL4zQhZ3OPEVHB7fL6Ka6skscFHfmt2k=
-github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
-github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
+github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0=
github.com/rs/zerolog v1.34.0 h1:k43nTLIwcTVQAncfCw4KZ2VY6ukYoZaBPNOE8txlOeY=
@@ -208,15 +215,19 @@ github.com/smartystreets/goconvey v1.7.2 h1:9RBaZCeXEQ3UselpuwUQHltGVXvdwm6cv1hg
github.com/smartystreets/goconvey v1.7.2/go.mod h1:Vw0tHAZW6lzCRk3xgdin6fKYcG+G3Pg9vgXWeJpQFMM=
github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI=
github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
+github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY=
+github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
+github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
+github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
-github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
-github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
+github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c=
@@ -239,28 +250,24 @@ go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20191112222119-e1110fd1c708/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
-golang.org/x/crypto v0.40.0 h1:r4x+VvoG5Fm+eJcxMaY8CQM7Lb0l1lsmjGBQ6s8BfKM=
-golang.org/x/crypto v0.40.0/go.mod h1:Qr1vMER5WyS2dfPHAlsOj01wgLbsyWtFn/aY+5+ZdxY=
golang.org/x/crypto v0.44.0 h1:A97SsFvM3AIwEEmTBiaxPPTYpDC47w720rdiiUvgoAU=
golang.org/x/crypto v0.44.0/go.mod h1:013i+Nw79BMiQiMsOPcVCB5ZIJbYkerPrGnOa00tvmc=
golang.org/x/exp v0.0.0-20250606033433-dcc06ee1d476 h1:bsqhLWFR6G6xiQcb+JoGqdKdRU6WzPWmK8E0jxTjzo4=
golang.org/x/exp v0.0.0-20250606033433-dcc06ee1d476/go.mod h1:3//PLf8L/X+8b4vuAfHzxeRUl04Adcb341+IGKfnqS8=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
+golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA=
+golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
-golang.org/x/net v0.42.0 h1:jzkYrhi3YQWD6MLBJcsklgQsoAcw89EcZbJw8Z614hs=
-golang.org/x/net v0.42.0/go.mod h1:FF1RA5d3u7nAYA4z2TkclSCKh68eSXtiFwcWQpPXdt8=
golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY=
golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU=
golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI=
golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw=
-golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I=
golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@@ -273,8 +280,6 @@ golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA=
-golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
@@ -285,8 +290,6 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
-golang.org/x/text v0.27.0 h1:4fGWRpyh641NLlecmyl4LOe6yDdfaYNrGb2zdfo4JV4=
-golang.org/x/text v0.27.0/go.mod h1:1D28KMCvyooCX9hBiosv5Tz/+YLxj0j7XhWjpSUF7CU=
golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM=
golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM=
golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY=
@@ -295,9 +298,9 @@ golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGm
golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
+golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ=
+golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY=
-google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc=
google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
diff --git a/infrastructure/check.go b/infrastructure/check.go
new file mode 100644
index 0000000..446d183
--- /dev/null
+++ b/infrastructure/check.go
@@ -0,0 +1,343 @@
+package infrastructure
+
+import (
+ "errors"
+ "fmt"
+ "time"
+
+ oclib "cloud.o-forge.io/core/oc-lib"
+ "cloud.o-forge.io/core/oc-lib/models/booking/planner"
+ "cloud.o-forge.io/core/oc-lib/models/resources"
+ "cloud.o-forge.io/core/oc-lib/models/workflow"
+ "cloud.o-forge.io/core/oc-lib/tools"
+)
+
+// ---------------------------------------------------------------------------
+// Slot availability check
+// ---------------------------------------------------------------------------
+
+const (
+ checkWindowHours = 5 // how far ahead to scan for a free slot (hours)
+ checkStepMin = 15 // time increment per scan step (minutes)
+ // asapBuffer is the minimum lead time added to time.Now() for as_possible
+ // and WHEN_POSSIBLE bookings. It absorbs NATS propagation + p2p stream
+ // latency so the ExpectedStartDate never arrives already in the past at
+ // the destination peer.
+ asapBuffer = 2 * time.Minute
+)
+
+// CheckResult holds the outcome of a slot availability check.
+type CheckResult struct {
+ Available bool `json:"available"`
+ Start time.Time `json:"start"`
+ End *time.Time `json:"end,omitempty"`
+ // NextSlot is the nearest free slot found within checkWindowHours when
+ // the requested slot is unavailable, or the preferred (conflict-free) slot
+ // when running in preemption mode.
+ NextSlot *time.Time `json:"next_slot,omitempty"`
+ Warnings []string `json:"warnings,omitempty"`
+ // Preemptible is true when the check was run in preemption mode.
+ Preemptible bool `json:"preemptible,omitempty"`
+ // SchedulingID is the session identifier the client must supply to Schedule
+ // in order to confirm the draft bookings created during this Check session.
+ SchedulingID string `json:"scheduling_id,omitempty"`
+}
+
+// bookingResource is the minimum info needed to verify a resource against the
+// planner cache.
+type bookingResource struct {
+ id string // resource MongoDB _id
+ peerPID string // peer public PeerID (PID) — PlannerCache key
+ instanceID string // resolved from WorkflowSchedule.SelectedInstances
+}
+
+// Check verifies that all booking-relevant resources (storage and compute) of
+// the given workflow have capacity for the requested time slot.
+//
+// - asap=true → ignore ws.Start, begin searching from time.Now()
+// - preemption → always return Available=true but populate Warnings with
+// conflicts and NextSlot with the nearest conflict-free alternative
+func (ws *WorkflowSchedule) Check(wfID string, asap bool, preemption bool, request *tools.APIRequest) (*CheckResult, error) {
+ // 1. Load workflow
+ obj, code, err := workflow.NewAccessor(request).LoadOne(wfID)
+ if code != 200 || err != nil {
+ msg := "could not load workflow " + wfID
+ if err != nil {
+ msg += ": " + err.Error()
+ }
+ return nil, errors.New(msg)
+ }
+ wf := obj.(*workflow.Workflow)
+
+ // 2. Resolve start
+ start := ws.Start
+ if asap || start.IsZero() {
+ start = time.Now().Add(asapBuffer)
+ }
+
+ // 3. Resolve end – use explicit end/duration or estimate via Planify
+ end := ws.End
+ if end == nil {
+ if ws.DurationS > 0 {
+ e := start.Add(time.Duration(ws.DurationS * float64(time.Second)))
+ end = &e
+ } else {
+ _, longest, _, _, planErr := wf.Planify(
+ start, nil,
+ ws.SelectedInstances, ws.SelectedPartnerships,
+ ws.SelectedBuyings, ws.SelectedStrategies,
+ int(ws.BookingMode), request,
+ )
+ if planErr == nil && longest > 0 {
+ e := start.Add(time.Duration(longest) * time.Second)
+ end = &e
+ }
+ }
+ }
+
+ // 4. Extract booking-relevant (storage + compute) resources from the graph,
+ // resolving the selected instance for each resource.
+ checkables := collectBookingResources(wf, ws.SelectedInstances)
+ // 5. Check every resource against its peer's planner
+ unavailable, warnings := checkResourceAvailability(checkables, start, end)
+ result := &CheckResult{
+ Start: start,
+ End: end,
+ Warnings: warnings,
+ }
+
+ // 6. Preemption mode: mark as schedulable regardless of conflicts, but
+ // surface warnings and the nearest conflict-free alternative.
+ if preemption {
+ result.Available = true
+ result.Preemptible = true
+ if len(unavailable) > 0 {
+ result.NextSlot = findNextSlot(checkables, start, end, checkWindowHours)
+ }
+ return result, nil
+ }
+
+ // 7. All resources are free
+ if len(unavailable) == 0 {
+ result.Available = true
+ return result, nil
+ }
+
+ // 8. Slot unavailable – locate the nearest free slot within the window
+ result.Available = false
+ result.NextSlot = findNextSlot(checkables, start, end, checkWindowHours)
+ return result, nil
+}
+
+// collectBookingResources returns unique storage and compute resources from the
+// workflow graph. For each resource the selected instance ID is resolved from
+// selectedInstances (the scheduler's SelectedInstances ConfigItem) so the planner
+// check targets the exact instance chosen by the user.
+func collectBookingResources(wf *workflow.Workflow, selectedInstances workflow.ConfigItem) map[string]bookingResource {
+ if wf.Graph == nil {
+ return nil
+ }
+ seen := map[string]bool{}
+ result := map[string]bookingResource{}
+
+ // Resolve MongoDB peer _id (DID) → public PeerID (PID) used as PlannerCache key.
+ peerAccess := oclib.NewRequestAdmin(oclib.LibDataEnum(oclib.PEER), nil)
+ didToPID := map[string]string{}
+ resolvePID := func(did string) string {
+ if pid, ok := didToPID[did]; ok {
+ return pid
+ }
+ if data := peerAccess.LoadOne(did); data.Data != nil {
+ if p := data.ToPeer(); p != nil {
+ didToPID[did] = p.PeerID
+ return p.PeerID
+ }
+ }
+ return ""
+ }
+
+ resolveInstanceID := func(res interface {
+ GetID() string
+ GetCreatorID() string
+ }) string {
+ idx := selectedInstances.Get(res.GetID())
+ switch r := res.(type) {
+ case *resources.StorageResource:
+ if inst := r.GetSelectedInstance(idx); inst != nil {
+ return inst.GetID()
+ }
+ case *resources.ComputeResource:
+ if inst := r.GetSelectedInstance(idx); inst != nil {
+ return inst.GetID()
+ }
+ }
+ return ""
+ }
+
+ for _, item := range wf.GetGraphItems(wf.Graph.IsStorage) {
+ i := item
+ _, res := i.GetResource()
+ if res == nil {
+ continue
+ }
+ id := res.GetID()
+ if seen[id] {
+ continue
+ }
+ pid := resolvePID(res.GetCreatorID())
+ if pid == "" {
+ continue
+ }
+ seen[id] = true
+ result[pid] = bookingResource{
+ id: id,
+ peerPID: pid,
+ instanceID: resolveInstanceID(res),
+ }
+ }
+
+ for _, item := range wf.GetGraphItems(wf.Graph.IsCompute) {
+ i := item
+ _, res := i.GetResource()
+ if res == nil {
+ continue
+ }
+ id := res.GetID()
+ if seen[id] {
+ continue
+ }
+ pid := resolvePID(res.GetCreatorID())
+ if pid == "" {
+ continue
+ }
+ seen[id] = true
+ result[pid] = bookingResource{
+ id: id,
+ peerPID: pid,
+ instanceID: resolveInstanceID(res),
+ }
+ }
+
+ return result
+}
+
+// checkResourceAvailability returns the IDs of unavailable resources and
+// human-readable warning messages.
+func checkResourceAvailability(res map[string]bookingResource, start time.Time, end *time.Time) (unavailable []string, warnings []string) {
+ for _, r := range res {
+ plannerMu.RLock()
+ entry := PlannerCache[r.peerPID]
+ plannerMu.RUnlock()
+ if entry == nil || entry.Planner == nil {
+ warnings = append(warnings, fmt.Sprintf(
+ "peer %s planner not in cache for resource %s – assuming available", r.peerPID, r.id))
+ continue
+ }
+ if !checkInstance(entry.Planner, r.id, r.instanceID, start, end) {
+ unavailable = append(unavailable, r.id)
+ warnings = append(warnings, fmt.Sprintf(
+ "resource %s is not available in [%s – %s]",
+ r.id, start.Format(time.RFC3339), formatOptTime(end)))
+ }
+ }
+ return
+}
+
+// checkInstance checks availability for the specific instance resolved by the
+// scheduler. When instanceID is empty (no instance selected / none resolvable),
+// it falls back to checking all instances known in the planner and returns true
+// if any one has remaining capacity. Returns true when no capacity is recorded.
+func checkInstance(p *planner.Planner, resourceID string, instanceID string, start time.Time, end *time.Time) bool {
+ if instanceID != "" {
+ return p.Check(resourceID, instanceID, nil, start, end)
+ }
+ // Fallback: accept if any known instance has free capacity
+ caps, ok := p.Capacities[resourceID]
+ if !ok || len(caps) == 0 {
+ return true // no recorded usage → assume free
+ }
+ for id := range caps {
+ if p.Check(resourceID, id, nil, start, end) {
+ return true
+ }
+ }
+ return false
+}
+
+// findNextSlot scans forward from 'from' in checkStepMin increments for up to
+// windowH hours and returns the first candidate start time at which all
+// resources are simultaneously free.
+func findNextSlot(resources map[string]bookingResource, from time.Time, originalEnd *time.Time, windowH int) *time.Time {
+ duration := time.Hour
+ if originalEnd != nil {
+ if d := originalEnd.Sub(from); d > 0 {
+ duration = d
+ }
+ }
+ step := time.Duration(checkStepMin) * time.Minute
+ limit := from.Add(time.Duration(windowH) * time.Hour)
+ for t := from.Add(step); t.Before(limit); t = t.Add(step) {
+ e := t.Add(duration)
+ if unavail, _ := checkResourceAvailability(resources, t, &e); len(unavail) == 0 {
+ return &t
+ }
+ }
+ return nil
+}
+
+func formatOptTime(t *time.Time) string {
+ if t == nil {
+ return "open"
+ }
+ return t.Format(time.RFC3339)
+}
+
+// GetWorkflowPeerIDs loads the workflow and returns the deduplicated list of
+// creator peer IDs for all its storage and compute resources.
+// These are the peers whose planners must be watched by a check stream.
+func GetWorkflowPeerIDs(wfID string, request *tools.APIRequest) ([]string, error) {
+ obj, code, err := workflow.NewAccessor(request).LoadOne(wfID)
+ if code != 200 || err != nil {
+ msg := "could not load workflow " + wfID
+ if err != nil {
+ msg += ": " + err.Error()
+ }
+ return nil, errors.New(msg)
+ }
+ wf := obj.(*workflow.Workflow)
+ if wf.Graph == nil {
+ return nil, nil
+ }
+ seen := map[string]bool{}
+ var peerIDs []string
+ for _, item := range wf.GetGraphItems(wf.Graph.IsStorage) {
+ i := item
+ _, res := i.GetResource()
+ if res == nil {
+ continue
+ }
+ if id := res.GetCreatorID(); id != "" && !seen[id] {
+ seen[id] = true
+ peerIDs = append(peerIDs, id)
+ }
+ }
+ for _, item := range wf.GetGraphItems(wf.Graph.IsCompute) {
+ i := item
+ _, res := i.GetResource()
+ if res == nil {
+ continue
+ }
+ if id := res.GetCreatorID(); id != "" && !seen[id] {
+ seen[id] = true
+ peerIDs = append(peerIDs, id)
+ }
+ }
+ realPeersID := []string{}
+ access := oclib.NewRequestAdmin(oclib.LibDataEnum(tools.PEER), nil)
+ for _, id := range peerIDs {
+ if data := access.LoadOne(id); data.Data != nil {
+ realPeersID = append(realPeersID, data.ToPeer().PeerID)
+ }
+ }
+ return realPeersID, nil
+}
diff --git a/infrastructure/considers.go b/infrastructure/considers.go
new file mode 100644
index 0000000..3f90973
--- /dev/null
+++ b/infrastructure/considers.go
@@ -0,0 +1,197 @@
+package infrastructure
+
+import (
+ "encoding/json"
+ "fmt"
+ "sync"
+
+ oclib "cloud.o-forge.io/core/oc-lib"
+ "cloud.o-forge.io/core/oc-lib/models/common/enum"
+ "cloud.o-forge.io/core/oc-lib/models/utils"
+ "cloud.o-forge.io/core/oc-lib/models/workflow"
+ "cloud.o-forge.io/core/oc-lib/models/workflow_execution"
+ "cloud.o-forge.io/core/oc-lib/tools"
+ "oc-scheduler/infrastructure/scheduling"
+)
+
+type executionConsidersPayload struct {
+ ID string `json:"id"`
+ ExecutionsID string `json:"executions_id"`
+ ExecutionID string `json:"execution_id"`
+ PeerIDs []string `json:"peer_ids"`
+}
+
+// ---------------------------------------------------------------------------
+// Per-execution mutex map (replaces the global stateMu)
+// ---------------------------------------------------------------------------
+
+var execLocksMu sync.RWMutex
+var execLocks = map[string]*sync.Mutex{} // executionID → per-execution mutex
+
+// RegisterExecLock creates a mutex entry for the execution. Called when a new execution draft is persisted.
+func RegisterExecLock(executionID string) {
+ execLocksMu.Lock()
+ execLocks[executionID] = &sync.Mutex{}
+ execLocksMu.Unlock()
+}
+
+// UnregisterExecLock removes the mutex entry. Called on unschedule and execution deletion.
+func UnregisterExecLock(executionID string) {
+ execLocksMu.Lock()
+ delete(execLocks, executionID)
+ execLocksMu.Unlock()
+}
+
+// applyConsidersLocal applies the considers update directly for a confirmed
+// booking or purchase (bypasses NATS since updateExecutionState resolves the
+// execution from the resource itself).
+func applyConsidersLocal(id string, dt tools.DataType) {
+ payload, err := json.Marshal(&executionConsidersPayload{ID: id})
+ if err != nil {
+ return
+ }
+ updateExecutionState(payload, dt)
+}
+
+// EmitConsidersExecution broadcasts a Considers / WORKFLOW_EXECUTION message to all
+// storage and compute peers of wf once the execution has transitioned to SCHEDULED.
+// Each receiving peer will use it to confirm (IsDraft=false) their local drafts.
+func EmitConsidersExecution(exec *workflow_execution.WorkflowExecution, wf *workflow.Workflow) {
+ if wf == nil || wf.Graph == nil {
+ return
+ }
+ peerIDs, err := GetWorkflowPeerIDs(wf.GetID(), &tools.APIRequest{Admin: true})
+ if err != nil {
+ return
+ }
+ if len(peerIDs) == 0 {
+ return
+ }
+ payload, err := json.Marshal(executionConsidersPayload{
+ ID: exec.GetID(),
+ ExecutionID: exec.GetID(),
+ ExecutionsID: exec.ExecutionsID,
+ PeerIDs: peerIDs})
+ if err != nil {
+ return
+ }
+ b, err := json.Marshal(tools.PropalgationMessage{
+ DataType: int(tools.WORKFLOW_EXECUTION),
+ Action: tools.PB_CONSIDERS,
+ Payload: payload,
+ })
+ if err != nil {
+ return
+ }
+ tools.NewNATSCaller().SetNATSPub(tools.PROPALGATION_EVENT, tools.NATSResponse{
+ FromApp: "oc-scheduler",
+ Datatype: tools.WORKFLOW_EXECUTION,
+ Method: int(tools.PROPALGATION_EVENT),
+ Payload: b,
+ })
+}
+
+// updateExecutionState sets BookingsState[id]=true (dt==BOOKING) or
+// PurchasesState[id]=true (dt==PURCHASE_RESOURCE) on the target execution.
+// payload must be JSON-encoded {"id":"...", "execution_id":"..."}.
+func updateExecutionState(payload []byte, dt tools.DataType) {
+ var data executionConsidersPayload
+ if err := json.Unmarshal(payload, &data); err != nil || data.ID == "" {
+ return
+ }
+ schdata := oclib.NewRequestAdmin(oclib.LibDataEnum(dt), nil).LoadOne(data.ID)
+ if schdata.Data == nil {
+ return
+ }
+ sch := scheduling.ToSchedulerObject(dt, schdata.Data)
+ if sch == nil {
+ return
+ }
+ execID := sch.GetExecutionId()
+
+ execLocksMu.RLock()
+ mu := execLocks[execID]
+ execLocksMu.RUnlock()
+ if mu == nil {
+ fmt.Printf("updateExecutionState: no lock for execution %s, skipping\n", execID)
+ return
+ }
+ mu.Lock()
+ defer mu.Unlock()
+
+ adminReq := &tools.APIRequest{Admin: true}
+ res, _, err := workflow_execution.NewAccessor(adminReq).LoadOne(execID)
+ if err != nil || res == nil {
+ fmt.Printf("updateExecutionState: could not load execution %s: %v\n", data.ExecutionID, err)
+ return
+ }
+
+ exec := res.(*workflow_execution.WorkflowExecution)
+ fmt.Println("sch.GetExecutionId()", data.ID, exec.BookingsState)
+
+ switch dt {
+ case tools.BOOKING:
+ if exec.BookingsState == nil {
+ exec.BookingsState = map[string]bool{}
+ }
+ exec.BookingsState[data.ID] = true
+ fmt.Println("sch.GetExecutionId()", data.ID)
+
+ case tools.PURCHASE_RESOURCE:
+ if exec.PurchasesState == nil {
+ exec.PurchasesState = map[string]bool{}
+ }
+ exec.PurchasesState[data.ID] = true
+ }
+ allConfirmed := true
+ for _, st := range exec.BookingsState {
+ if !st {
+ allConfirmed = false
+ break
+ }
+ }
+ for _, st := range exec.PurchasesState {
+ if !st {
+ allConfirmed = false
+ break
+ }
+ }
+ if allConfirmed {
+ exec.State = enum.SCHEDULED
+ exec.IsDraft = false
+ }
+ if _, _, err := utils.GenericRawUpdateOne(exec, exec.GetID(), workflow_execution.NewAccessor(adminReq)); err != nil {
+ fmt.Printf("updateExecutionState: could not update execution %s: %v\n", sch.GetExecutionId(), err)
+ return
+ }
+ if allConfirmed {
+ // Confirm the order and notify all peers that execution is scheduled.
+ go confirmSessionOrder(exec.ExecutionsID, adminReq)
+ obj, _, err := workflow.NewAccessor(adminReq).LoadOne(exec.WorkflowID)
+ if err == nil && obj != nil {
+ go EmitConsidersExecution(exec, obj.(*workflow.Workflow))
+ }
+ }
+}
+
+// confirmExecutionDrafts is called when a Considers/WORKFLOW_EXECUTION message
+// is received from oc-discovery, meaning the originating peer has confirmed the
+// execution as SCHEDULED. For every booking and purchase ID listed in the
+// execution's states, we confirm the local draft (IsDraft=false).
+func confirmExecutionDrafts(payload []byte) {
+ var data executionConsidersPayload
+ if err := json.Unmarshal(payload, &data); err != nil {
+ fmt.Printf("confirmExecutionDrafts: could not parse payload: %v\n", err)
+ return
+ }
+ access := oclib.NewRequestAdmin(oclib.LibDataEnum(tools.WORKFLOW_EXECUTION), nil)
+ d := access.LoadOne(data.ExecutionID)
+ if exec := d.ToWorkflowExecution(); exec != nil {
+ for id := range exec.BookingsState {
+ go confirmResource(id, tools.BOOKING)
+ }
+ for id := range exec.PurchasesState {
+ go confirmResource(id, tools.PURCHASE_RESOURCE)
+ }
+ }
+}
diff --git a/infrastructure/nats.go b/infrastructure/nats.go
index 1b48d6a..8ca421b 100644
--- a/infrastructure/nats.go
+++ b/infrastructure/nats.go
@@ -5,155 +5,18 @@ import (
"encoding/json"
"fmt"
"oc-scheduler/conf"
- "slices"
"sync"
"time"
oclib "cloud.o-forge.io/core/oc-lib"
"cloud.o-forge.io/core/oc-lib/config"
"cloud.o-forge.io/core/oc-lib/models/booking"
- "cloud.o-forge.io/core/oc-lib/models/booking/planner"
- "cloud.o-forge.io/core/oc-lib/models/common/enum"
"cloud.o-forge.io/core/oc-lib/models/resources/purchase_resource"
"cloud.o-forge.io/core/oc-lib/models/utils"
- "cloud.o-forge.io/core/oc-lib/models/workflow"
- "cloud.o-forge.io/core/oc-lib/models/workflow/graph"
- "cloud.o-forge.io/core/oc-lib/models/workflow_execution"
"cloud.o-forge.io/core/oc-lib/tools"
"github.com/nats-io/nats.go"
)
-const plannerTTL = 24 * time.Hour
-
-// ---------------------------------------------------------------------------
-// Planner cache — protected by plannerMu
-// ---------------------------------------------------------------------------
-
-// plannerEntry wraps a planner snapshot with refresh-ownership tracking.
-// At most one check session may be the "refresh owner" of a given peer's
-// planner at a time: it emits PB_PLANNER to request a fresh snapshot from
-// oc-discovery and, on close (clean or forced), emits PB_CLOSE_PLANNER to
-// release the stream. Any subsequent session that needs the same peer's
-// planner will see Refreshing=true and skip the duplicate request.
-type plannerEntry struct {
- Planner *planner.Planner
- Refreshing bool // true while a PB_PLANNER request is in flight
- RefreshOwner string // session UUID that initiated the current refresh
-}
-
-var plannerMu sync.RWMutex
-var PlannerCache = map[string]*plannerEntry{}
-var plannerAddedAt = map[string]time.Time{} // peerID → first-seen timestamp
-
-// ---------------------------------------------------------------------------
-// Subscriber registries — one keyed by peerID, one by workflowID
-// ---------------------------------------------------------------------------
-
-var subsMu sync.RWMutex
-var plannerSubs = map[string][]chan struct{}{} // peerID → notification channels
-var workflowSubs = map[string][]chan struct{}{} // workflowID → notification channels
-
-// SubscribePlannerUpdates registers interest in planner changes for the given
-// peer IDs. The returned channel receives one struct{} (non-blocking) each time
-// any of those planners is updated. Call cancel to unregister.
-func SubscribePlannerUpdates(peerIDs []string) (<-chan struct{}, func()) {
- return subscribe(&subsMu, plannerSubs, peerIDs)
-}
-
-// SubscribeWorkflowUpdates registers interest in workflow modifications for the
-// given workflow ID. The returned channel is signalled when the workflow changes
-// (peer list may have grown or shrunk). Call cancel to unregister.
-func SubscribeWorkflowUpdates(wfID string) (<-chan struct{}, func()) {
- ch, cancel := subscribe(&subsMu, workflowSubs, []string{wfID})
- return ch, cancel
-}
-
-// subscribe is the generic helper used by both registries.
-func subscribe(mu *sync.RWMutex, registry map[string][]chan struct{}, keys []string) (<-chan struct{}, func()) {
- ch := make(chan struct{}, 1)
- mu.Lock()
- for _, k := range keys {
- registry[k] = append(registry[k], ch)
- }
- mu.Unlock()
- cancel := func() {
- mu.Lock()
- for _, k := range keys {
- subs := registry[k]
- for i, s := range subs {
- if s == ch {
- registry[k] = append(subs[:i], subs[i+1:]...)
- break
- }
- }
- }
- mu.Unlock()
- }
- return ch, cancel
-}
-
-func notifyPlannerWatchers(peerID string) {
- notify(&subsMu, plannerSubs, peerID)
-}
-
-func notifyWorkflowWatchers(wfID string) {
- notify(&subsMu, workflowSubs, wfID)
-}
-
-func notify(mu *sync.RWMutex, registry map[string][]chan struct{}, key string) {
- mu.RLock()
- subs := registry[key]
- mu.RUnlock()
- for _, ch := range subs {
- select {
- case ch <- struct{}{}:
- default:
- }
- }
-}
-
-// ---------------------------------------------------------------------------
-// Cache helpers
-// ---------------------------------------------------------------------------
-
-// storePlanner inserts or updates the planner snapshot for peerID.
-// On first insertion it schedules an automatic eviction after plannerTTL.
-// Existing refresh-ownership state (Refreshing / RefreshOwner) is preserved
-// so that an in-flight request is not inadvertently reset.
-// All subscribers interested in this peer are notified.
-func storePlanner(peerID string, p *planner.Planner) {
- plannerMu.Lock()
- entry := PlannerCache[peerID]
- isNew := entry == nil
- if isNew {
- entry = &plannerEntry{}
- PlannerCache[peerID] = entry
- plannerAddedAt[peerID] = time.Now()
- go evictAfter(peerID, plannerTTL)
- }
- entry.Planner = p
- plannerMu.Unlock()
- notifyPlannerWatchers(peerID)
-}
-
-// evictAfter waits ttl from first insertion then deletes the cache entry and
-// emits PB_CLOSE_PLANNER so oc-discovery stops streaming for this peer.
-// This is the only path that actually removes an entry from PlannerCache;
-// session close (ReleaseRefreshOwnership) only resets ownership state.
-func evictAfter(peerID string, ttl time.Duration) {
- time.Sleep(ttl)
- plannerMu.Lock()
- _, exists := PlannerCache[peerID]
- if exists {
- delete(PlannerCache, peerID)
- delete(plannerAddedAt, peerID)
- }
- plannerMu.Unlock()
- if exists {
- EmitNATS(peerID, tools.PropalgationMessage{Action: tools.PB_CLOSE_PLANNER})
- }
-}
-
// ---------------------------------------------------------------------------
// NATS emission
// ---------------------------------------------------------------------------
@@ -174,275 +37,48 @@ func EmitNATS(peerID string, message tools.PropalgationMessage) {
})
}
-type executionConsidersPayload struct {
- ID string `json:"id"`
- ExecutionsID string `json:"executions_id"`
- ExecutionID string `json:"execution_id"`
- PeerIDs []string `json:"peer_ids"`
-}
-
-// emitConsiders broadcasts a PROPALGATION_EVENT with the Considers action,
-// carrying the stored resource ID and its datatype (BOOKING or PURCHASE_RESOURCE).
-func emitConsiders(id string, executionID string, dt tools.DataType) {
- access := oclib.NewRequestAdmin(oclib.LibDataEnum(oclib.WORKFLOW_EXECUTION), nil)
- data := access.LoadOne(executionID)
- if data.ToWorkflowExecution() != nil {
- exec := data.ToWorkflowExecution()
- if peers, err := GetWorkflowPeerIDs(exec.WorkflowID, &tools.APIRequest{Admin: true}); err == nil {
- payload, _ := json.Marshal(&executionConsidersPayload{
- ID: id,
- ExecutionsID: exec.ExecutionsID,
- ExecutionID: executionID,
- PeerIDs: peers,
- })
- b, _ := json.Marshal(tools.PropalgationMessage{
- DataType: int(dt),
- Action: tools.PB_CONSIDERS,
- Payload: payload,
- })
- tools.NewNATSCaller().SetNATSPub(tools.PROPALGATION_EVENT, tools.NATSResponse{
- FromApp: "oc-scheduler",
- Datatype: dt,
- Method: int(tools.PROPALGATION_EVENT),
- Payload: b,
- })
- }
- }
-}
-
-// EmitConsidersExecution broadcasts a Considers / WORKFLOW_EXECUTION message to all
-// storage and compute peers of wf once the execution has transitioned to SCHEDULED.
-// Each receiving peer will use it to confirm (IsDraft=false) their local drafts.
-func EmitConsidersExecution(exec *workflow_execution.WorkflowExecution, wf *workflow.Workflow) {
- if wf == nil || wf.Graph == nil {
- return
- }
- peerIDs, err := GetWorkflowPeerIDs(wf.GetID(), &tools.APIRequest{Admin: true})
- if err != nil {
- return
- }
- if len(peerIDs) == 0 {
- return
- }
- payload, err := json.Marshal(executionConsidersPayload{
- ID: exec.GetID(),
- ExecutionID: exec.GetID(),
- ExecutionsID: exec.ExecutionsID,
- PeerIDs: peerIDs})
- if err != nil {
- return
- }
- b, err := json.Marshal(tools.PropalgationMessage{
- DataType: int(tools.WORKFLOW_EXECUTION),
- Action: tools.PB_CONSIDERS,
- Payload: payload,
- })
- if err != nil {
- return
- }
- tools.NewNATSCaller().SetNATSPub(tools.PROPALGATION_EVENT, tools.NATSResponse{
- FromApp: "oc-scheduler",
- Datatype: tools.WORKFLOW_EXECUTION,
- Method: int(tools.PROPALGATION_EVENT),
- Payload: b,
- })
-}
-
-// updateExecutionState sets BookingsState[id]=true (dt==BOOKING) or
-// PurchasesState[id]=true (dt==PURCHASE_RESOURCE) on the target execution.
-// payload must be JSON-encoded {"id":"...", "execution_id":"..."}.
-func updateExecutionState(payload []byte, dt tools.DataType) {
- var data executionConsidersPayload
- if err := json.Unmarshal(payload, &data); err != nil || data.ID == "" || data.ExecutionID == "" {
- return
- }
- adminReq := &tools.APIRequest{Admin: true}
- res, _, err := workflow_execution.NewAccessor(adminReq).LoadOne(data.ExecutionID)
- if err != nil || res == nil {
- fmt.Printf("updateExecutionState: could not load execution %s: %v\n", data.ExecutionID, err)
- return
- }
- exec := res.(*workflow_execution.WorkflowExecution)
- switch dt {
- case tools.BOOKING:
- if exec.BookingsState == nil {
- exec.BookingsState = map[string]bool{}
- }
- exec.BookingsState[data.ID] = true
- case tools.PURCHASE_RESOURCE:
- if exec.PurchasesState == nil {
- exec.PurchasesState = map[string]bool{}
- }
- exec.PurchasesState[data.ID] = true
- }
- found := true
- for _, st := range exec.BookingsState {
- if !st {
- found = false
- break
- }
- }
- for _, st := range exec.PurchasesState {
- if !st {
- found = false
- break
- }
- }
- if found {
- exec.State = enum.SCHEDULED
- }
- if _, _, err := utils.GenericRawUpdateOne(exec, data.ExecutionID, workflow_execution.NewAccessor(adminReq)); err != nil {
- fmt.Printf("updateExecutionState: could not update execution %s: %v\n", data.ExecutionID, err)
- }
-}
-
-// confirmExecutionDrafts is called when a Considers/WORKFLOW_EXECUTION message
-// is received from oc-discovery, meaning the originating peer has confirmed the
-// execution as SCHEDULED. For every booking and purchase ID listed in the
-// execution's states, we confirm the local draft (IsDraft=false).
-func confirmExecutionDrafts(payload []byte) {
- var data executionConsidersPayload
- if err := json.Unmarshal(payload, &data); err != nil {
- fmt.Printf("confirmExecutionDrafts: could not parse payload: %v\n", err)
- return
- }
- access := oclib.NewRequestAdmin(oclib.LibDataEnum(tools.WORKFLOW_EXECUTION), nil)
- d := access.LoadOne(data.ExecutionID)
- if exec := d.ToWorkflowExecution(); exec != nil {
- for id := range exec.BookingsState {
- go confirmResource(id, tools.BOOKING)
- }
- for id := range exec.PurchasesState {
- go confirmResource(id, tools.PURCHASE_RESOURCE)
- }
- }
-}
-
// ---------------------------------------------------------------------------
// NATS listeners
// ---------------------------------------------------------------------------
func ListenNATS() {
tools.NewNATSCaller().ListenNats(map[tools.NATSMethod]func(tools.NATSResponse){
- // Receive planner snapshots pushed by oc-discovery and cache them.
- // Considers messages:
- // BOOKING / PURCHASE_RESOURCE → mark the individual resource as
- // considered in the target WorkflowExecution (BookingsState / PurchasesState).
- // WORKFLOW_EXECUTION → the execution reached SCHEDULED; confirm all
- // local draft bookings and purchases listed in its states.
- tools.PLANNER_EXECUTION: func(resp tools.NATSResponse) {
- m := map[string]interface{}{}
- p := planner.Planner{}
- if err := json.Unmarshal(resp.Payload, &m); err != nil {
- return
- }
- if err := json.Unmarshal(resp.Payload, &p); err != nil {
- return
- }
- storePlanner(fmt.Sprintf("%v", m["peer_id"]), &p)
- },
- tools.PROPALGATION_EVENT: func(resp tools.NATSResponse) {
- if resp.FromApp != "oc-discovery" {
- return
- }
- var prop tools.PropalgationMessage
- if err := json.Unmarshal(resp.Payload, &prop); err != nil {
- return
- }
- switch prop.Action {
- case tools.PB_CONSIDERS:
- switch tools.DataType(prop.DataType) {
- case tools.BOOKING, tools.PURCHASE_RESOURCE:
- updateExecutionState(prop.Payload, tools.DataType(prop.DataType))
- case tools.WORKFLOW_EXECUTION:
- confirmExecutionDrafts(prop.Payload)
- }
- }
- },
-
- // Incoming resource creation events:
- // - WORKFLOW → refresh peer planner entries and notify CheckStream watchers.
- // - BOOKING → if destined for us, validate, store as draft, start 10-min
- // expiry timer, and emit a "considers_booking" response.
- // - PURCHASE → if destined for us, store as draft, start 10-min expiry
- // timer, and emit a "considers_purchase" response.
- tools.REMOVE_RESOURCE: func(resp tools.NATSResponse) {
- switch resp.Datatype {
- case tools.WORKFLOW:
- wf := workflow.Workflow{}
- if err := json.Unmarshal(resp.Payload, &wf); err != nil {
- return
- }
- notifyWorkflowWatchers(wf.GetID())
- }
- },
- tools.CREATE_RESOURCE: func(resp tools.NATSResponse) {
- switch resp.Datatype {
- case tools.WORKFLOW:
- wf := workflow.Workflow{}
- if err := json.Unmarshal(resp.Payload, &wf); err != nil {
- return
- }
- broadcastPlanner(&wf)
- notifyWorkflowWatchers(wf.GetID())
- case tools.BOOKING:
- var bk booking.Booking
- if err := json.Unmarshal(resp.Payload, &bk); err != nil {
- return
- }
- self, err := oclib.GetMySelf()
- if err != nil || self == nil || bk.DestPeerID != self.GetID() {
- return
- }
- // Reject bookings whose start date is already in the past.
- if !bk.ExpectedStartDate.IsZero() && bk.ExpectedStartDate.Before(time.Now()) {
- fmt.Println("ListenNATS: booking start date is in the past, discarding")
- return
- }
- // Verify the slot is free in our planner (if we have one).
- plannerMu.RLock()
- selfEntry := PlannerCache[self.PeerID]
- plannerMu.RUnlock()
- if selfEntry != nil && selfEntry.Planner != nil && !checkInstance(selfEntry.Planner, bk.ResourceID, bk.InstanceID, bk.ExpectedStartDate, bk.ExpectedEndDate) {
- fmt.Println("ListenNATS: booking conflicts with local planner, discarding")
- return
- }
- adminReq := &tools.APIRequest{Admin: true}
- bk.IsDraft = true
- stored, _, err := booking.NewAccessor(adminReq).StoreOne(&bk)
- if err != nil {
- fmt.Println("ListenNATS: could not store booking:", err)
- return
- }
- storedID := stored.GetID()
- go refreshSelfPlanner(self.PeerID, adminReq)
- time.AfterFunc(10*time.Minute, func() { draftTimeout(storedID, tools.BOOKING) })
- go emitConsiders(storedID, stored.(*booking.Booking).ExecutionID, tools.BOOKING)
-
- case tools.PURCHASE_RESOURCE:
- var pr purchase_resource.PurchaseResource
- if err := json.Unmarshal(resp.Payload, &pr); err != nil {
- return
- }
- self, err := oclib.GetMySelf()
- if err != nil || self == nil || pr.DestPeerID != self.GetID() {
- return
- }
- adminReq := &tools.APIRequest{Admin: true}
- pr.IsDraft = true
- stored, _, err := purchase_resource.NewAccessor(adminReq).StoreOne(&pr)
- if err != nil {
- fmt.Println("ListenNATS: could not store purchase:", err)
- return
- }
- storedID := stored.GetID()
- time.AfterFunc(10*time.Minute, func() { draftTimeout(storedID, tools.PURCHASE_RESOURCE) })
- go emitConsiders(storedID, stored.(*purchase_resource.PurchaseResource).ExecutionID, tools.PURCHASE_RESOURCE)
- }
- },
+ tools.PLANNER_EXECUTION: handlePlannerExecution,
+ tools.PROPALGATION_EVENT: handlePropagationEvent,
+ tools.REMOVE_RESOURCE: handleRemoveResource,
+ tools.CREATE_RESOURCE: handleCreateResource,
})
}
+// ---------------------------------------------------------------------------
+// Confirm channels
+// ---------------------------------------------------------------------------
+
+// ListenConfirm opens a direct NATS connection and subscribes to the hardcoded
+// "confirm_booking" and "confirm_purchase" subjects. It reconnects automatically
+// if the connection is lost.
+func ListenConfirm() {
+ natsURL := config.GetConfig().NATSUrl
+ if natsURL == "" {
+ fmt.Println("ListenConfirm: NATS_SERVER not set, skipping confirm listeners")
+ return
+ }
+ for {
+ nc, err := nats.Connect(natsURL)
+ if err != nil {
+ fmt.Println("ListenConfirm: could not connect to NATS:", err)
+ time.Sleep(time.Minute)
+ continue
+ }
+ var wg sync.WaitGroup
+ wg.Add(2)
+ go listenConfirmChannel(nc, "confirm_booking", tools.BOOKING, &wg)
+ go listenConfirmChannel(nc, "confirm_purchase", tools.PURCHASE_RESOURCE, &wg)
+ wg.Wait()
+ nc.Close()
+ }
+}
+
// ---------------------------------------------------------------------------
// Draft timeout
// ---------------------------------------------------------------------------
@@ -474,254 +110,9 @@ func draftTimeout(id string, dt tools.DataType) {
}
// ---------------------------------------------------------------------------
-// Confirm channels
+// Kubernetes namespace helper
// ---------------------------------------------------------------------------
-// confirmResource sets IsDraft=false for a booking or purchase resource.
-// For bookings it also advances State to SCHEDULED and refreshes the local planner.
-func confirmResource(id string, dt tools.DataType) {
- adminReq := &tools.APIRequest{Admin: true}
- switch dt {
- case tools.BOOKING:
- res, _, err := booking.NewAccessor(adminReq).LoadOne(id)
- if err != nil || res == nil {
- fmt.Printf("confirmResource: could not load booking %s: %v\n", id, err)
- return
- }
- bk := res.(*booking.Booking)
- bk.IsDraft = false
- bk.State = enum.SCHEDULED
- if _, _, err := utils.GenericRawUpdateOne(bk, id, booking.NewAccessor(adminReq)); err != nil {
- fmt.Printf("confirmResource: could not confirm booking %s: %v\n", id, err)
- return
- }
- createNamespace(bk.ExecutionsID) // create Namespace locally
- self, err := oclib.GetMySelf()
- if err == nil && self != nil {
- go refreshSelfPlanner(self.PeerID, adminReq)
- }
- case tools.PURCHASE_RESOURCE:
- res, _, err := purchase_resource.NewAccessor(adminReq).LoadOne(id)
- if err != nil || res == nil {
- fmt.Printf("confirmResource: could not load purchase %s: %v\n", id, err)
- return
- }
- pr := res.(*purchase_resource.PurchaseResource)
- pr.IsDraft = false
- if _, _, err := utils.GenericRawUpdateOne(pr, id, purchase_resource.NewAccessor(adminReq)); err != nil {
- fmt.Printf("confirmResource: could not confirm purchase %s: %v\n", id, err)
- }
- }
-}
-
-// listenConfirmChannel subscribes to a NATS subject and calls confirmResource
-// for each message received. The message body is expected to be the plain
-// resource ID (UTF-8 string).
-func listenConfirmChannel(nc *nats.Conn, subject string, dt tools.DataType, wg *sync.WaitGroup) {
- defer wg.Done()
- ch := make(chan *nats.Msg, 64)
- sub, err := nc.ChanSubscribe(subject, ch)
- if err != nil {
- fmt.Printf("listenConfirmChannel: could not subscribe to %s: %v\n", subject, err)
- return
- }
- defer sub.Unsubscribe()
- for msg := range ch {
- confirmResource(string(msg.Data), dt)
- }
-}
-
-// ListenConfirm opens a direct NATS connection and subscribes to the hardcoded
-// "confirm_booking" and "confirm_purchase" subjects. It reconnects automatically
-// if the connection is lost.
-func ListenConfirm() {
- natsURL := config.GetConfig().NATSUrl
- if natsURL == "" {
- fmt.Println("ListenConfirm: NATS_SERVER not set, skipping confirm listeners")
- return
- }
- for {
- nc, err := nats.Connect(natsURL)
- if err != nil {
- fmt.Println("ListenConfirm: could not connect to NATS:", err)
- time.Sleep(time.Minute)
- continue
- }
- var wg sync.WaitGroup
- wg.Add(2)
- go listenConfirmChannel(nc, "confirm_booking", tools.BOOKING, &wg)
- go listenConfirmChannel(nc, "confirm_purchase", tools.PURCHASE_RESOURCE, &wg)
- wg.Wait()
- nc.Close()
- }
-}
-
-// ---------------------------------------------------------------------------
-// Self-planner initialisation
-// ---------------------------------------------------------------------------
-
-// InitSelfPlanner bootstraps our own planner entry at startup.
-// It waits (with 15-second retries) for our peer record to be present in the
-// database before generating the first planner snapshot and broadcasting it
-// on PB_PLANNER. This handles the race between oc-scheduler starting before
-// oc-peer has fully registered our node.
-func InitSelfPlanner() {
- for {
- self, err := oclib.GetMySelf()
- if err != nil || self == nil {
- fmt.Println("InitSelfPlanner: self peer not found yet, retrying in 15s...")
- time.Sleep(15 * time.Second)
- continue
- }
- refreshSelfPlanner(self.PeerID, &tools.APIRequest{Admin: true})
- return
- }
-}
-
-// ---------------------------------------------------------------------------
-// Self-planner refresh
-// ---------------------------------------------------------------------------
-
-// refreshSelfPlanner regenerates the local planner from the current state of
-// the booking DB, stores it in PlannerCache under our own node UUID, and
-// broadcasts it on PROPALGATION_EVENT / PB_PLANNER so all listeners (including
-// oc-discovery) are kept in sync.
-//
-// It should be called whenever a booking for our own peer is created, whether
-// by direct DB insertion (self-peer routing) or upon receiving a CREATE_RESOURCE
-// BOOKING message from oc-discovery.
-func refreshSelfPlanner(peerID string, request *tools.APIRequest) {
- p, err := planner.GenerateShallow(request)
- if err != nil {
- fmt.Println("refreshSelfPlanner: could not generate planner:", err)
- return
- }
-
- // Update the local cache and notify any waiting CheckStream goroutines.
- storePlanner(peerID, p)
-
- // Broadcast the updated planner so remote peers (and oc-discovery) can
- // refresh their view of our availability.
- type plannerWithPeer struct {
- PeerID string `json:"peer_id"`
- *planner.Planner
- }
- plannerPayload, err := json.Marshal(plannerWithPeer{PeerID: peerID, Planner: p})
- if err != nil {
- return
- }
- EmitNATS(peerID, tools.PropalgationMessage{
- Action: tools.PB_PLANNER,
- Payload: plannerPayload,
- })
-}
-
-// ---------------------------------------------------------------------------
-// Planner broadcast
-// ---------------------------------------------------------------------------
-
-// RequestPlannerRefresh asks oc-discovery for a fresh planner snapshot for
-// each peer in peerIDs. Only the first session to request a given peer becomes
-// its "refresh owner": subsequent sessions see Refreshing=true and skip the
-// duplicate PB_PLANNER emission. Returns the subset of peerIDs for which this
-// session claimed ownership (needed to release on close).
-func RequestPlannerRefresh(peerIDs []string, sessionID string) []string {
- var owned []string
- for _, peerID := range peerIDs {
- plannerMu.Lock()
- entry := PlannerCache[peerID]
- if entry == nil {
- entry = &plannerEntry{}
- PlannerCache[peerID] = entry
- plannerAddedAt[peerID] = time.Now()
- go evictAfter(peerID, plannerTTL)
- }
- shouldRequest := !entry.Refreshing
- if shouldRequest {
- entry.Refreshing = true
- entry.RefreshOwner = sessionID
- }
- plannerMu.Unlock()
- if shouldRequest {
- owned = append(owned, peerID)
- payload, _ := json.Marshal(map[string]any{"peer_id": peerID})
- EmitNATS(peerID, tools.PropalgationMessage{
- Action: tools.PB_PLANNER,
- Payload: payload,
- })
- }
- }
- return owned
-}
-
-// ReleaseRefreshOwnership is called when a check session closes (clean or
-// forced). For each peer this session owns, it resets the refresh state and
-// emits PB_CLOSE_PLANNER so oc-discovery stops the planner stream.
-// The planner data itself stays in the cache until TTL eviction.
-func ReleaseRefreshOwnership(peerIDs []string, sessionID string) {
- for _, peerID := range peerIDs {
- plannerMu.Lock()
- if entry := PlannerCache[peerID]; entry != nil && entry.RefreshOwner == sessionID {
- entry.Refreshing = false
- entry.RefreshOwner = ""
- }
- plannerMu.Unlock()
- payload, _ := json.Marshal(map[string]any{"peer_id": peerID})
- EmitNATS(peerID, tools.PropalgationMessage{
- Action: tools.PB_CLOSE_PLANNER,
- Payload: payload,
- })
- }
-}
-
-// broadcastPlanner iterates the storage and compute peers of the given workflow
-// and, for each peer not yet in the cache, emits a PB_PLANNER propagation so
-// downstream consumers (oc-discovery, other schedulers) refresh their state.
-func broadcastPlanner(wf *workflow.Workflow) {
- if wf.Graph == nil {
- return
- }
- items := []graph.GraphItem{}
- items = append(items, wf.GetGraphItems(wf.Graph.IsStorage)...)
- items = append(items, wf.GetGraphItems(wf.Graph.IsCompute)...)
-
- seen := []string{}
- for _, item := range items {
- i := item
- _, res := i.GetResource()
- if res == nil {
- continue
- }
- creatorID := res.GetCreatorID()
- if slices.Contains(seen, creatorID) {
- continue
- }
-
- data := oclib.NewRequestAdmin(oclib.LibDataEnum(oclib.PEER), nil).LoadOne(creatorID)
- p := data.ToPeer()
- if p == nil {
- continue
- }
-
- plannerMu.RLock()
- cached := PlannerCache[p.PeerID]
- plannerMu.RUnlock()
-
- // Only request if no snapshot and no refresh already in flight.
- if cached == nil || (cached.Planner == nil && !cached.Refreshing) {
- payload, err := json.Marshal(map[string]interface{}{"peer_id": p.PeerID})
- if err != nil {
- continue
- }
- seen = append(seen, creatorID)
- EmitNATS(p.PeerID, tools.PropalgationMessage{
- Action: tools.PB_PLANNER,
- Payload: payload,
- })
- }
- }
-}
-
func createNamespace(ns string) error {
/*
* This function is used to create a namespace.
diff --git a/infrastructure/nats_handlers.go b/infrastructure/nats_handlers.go
new file mode 100644
index 0000000..0847216
--- /dev/null
+++ b/infrastructure/nats_handlers.go
@@ -0,0 +1,274 @@
+package infrastructure
+
+import (
+ "encoding/json"
+ "fmt"
+ "sync"
+ "time"
+
+ oclib "cloud.o-forge.io/core/oc-lib"
+ "cloud.o-forge.io/core/oc-lib/models/booking"
+ "cloud.o-forge.io/core/oc-lib/models/booking/planner"
+ "cloud.o-forge.io/core/oc-lib/models/common/enum"
+ "cloud.o-forge.io/core/oc-lib/models/peer"
+ "cloud.o-forge.io/core/oc-lib/models/resources/purchase_resource"
+ "cloud.o-forge.io/core/oc-lib/models/utils"
+ "cloud.o-forge.io/core/oc-lib/models/workflow"
+ "cloud.o-forge.io/core/oc-lib/tools"
+ "github.com/nats-io/nats.go"
+)
+
+func handlePlannerExecution(resp tools.NATSResponse) {
+ m := map[string]interface{}{}
+ p := planner.Planner{}
+ if err := json.Unmarshal(resp.Payload, &m); err != nil {
+ return
+ }
+ if err := json.Unmarshal(resp.Payload, &p); err != nil {
+ return
+ }
+ storePlanner(fmt.Sprintf("%v", m["peer_id"]), &p)
+}
+
+func handlePropagationEvent(resp tools.NATSResponse) {
+ if resp.FromApp != "oc-discovery" {
+ return
+ }
+ var prop tools.PropalgationMessage
+ if err := json.Unmarshal(resp.Payload, &prop); err != nil {
+ return
+ }
+ switch prop.Action {
+ case tools.PB_CONSIDERS:
+ fmt.Println("PB_CONSIDERS")
+ switch tools.DataType(prop.DataType) {
+ case tools.BOOKING, tools.PURCHASE_RESOURCE:
+ fmt.Println("updateExecutionState", tools.DataType(prop.DataType))
+ updateExecutionState(prop.Payload, tools.DataType(prop.DataType))
+ case tools.WORKFLOW_EXECUTION:
+ confirmExecutionDrafts(prop.Payload)
+ }
+ }
+}
+
+func handleRemoveResource(resp tools.NATSResponse) {
+ switch resp.Datatype {
+ case tools.WORKFLOW:
+ wf := workflow.Workflow{}
+ if err := json.Unmarshal(resp.Payload, &wf); err != nil {
+ return
+ }
+ notifyWorkflowWatchers(wf.GetID())
+ case tools.BOOKING:
+ var p removeResourcePayload
+ if err := json.Unmarshal(resp.Payload, &p); err != nil {
+ return
+ }
+ self, err := oclib.GetMySelf()
+ if err != nil || self == nil {
+ return
+ }
+ adminReq := &tools.APIRequest{Admin: true}
+ res, _, loadErr := booking.NewAccessor(adminReq).LoadOne(p.ID)
+ if loadErr != nil || res == nil {
+ return
+ }
+ existing := res.(*booking.Booking)
+ if existing.SchedulerPeerID != p.SchedulerPeerID || existing.ExecutionsID != p.ExecutionsID {
+ fmt.Println("ListenNATS REMOVE_RESOURCE booking: auth mismatch, ignoring", p.ID)
+ return
+ }
+ booking.NewAccessor(adminReq).DeleteOne(p.ID)
+ go refreshSelfPlanner(self.PeerID, adminReq)
+ case tools.PURCHASE_RESOURCE:
+ var p removeResourcePayload
+ if err := json.Unmarshal(resp.Payload, &p); err != nil {
+ return
+ }
+ adminReq := &tools.APIRequest{Admin: true}
+ res, _, loadErr := purchase_resource.NewAccessor(adminReq).LoadOne(p.ID)
+ if loadErr != nil || res == nil {
+ return
+ }
+ existing := res.(*purchase_resource.PurchaseResource)
+ if existing.SchedulerPeerID != p.SchedulerPeerID || existing.ExecutionsID != p.ExecutionsID {
+ fmt.Println("ListenNATS REMOVE_RESOURCE purchase: auth mismatch, ignoring", p.ID)
+ return
+ }
+ purchase_resource.NewAccessor(adminReq).DeleteOne(p.ID)
+ }
+}
+
+func handleCreateBooking(bk *booking.Booking, self *peer.Peer, adminReq *tools.APIRequest) {
+ // Upsert: if a booking with this ID already exists, verify auth and update.
+ if existing, _, loadErr := booking.NewAccessor(adminReq).LoadOne(bk.GetID()); loadErr == nil && existing != nil {
+ prev := existing.(*booking.Booking)
+ if prev.SchedulerPeerID != bk.SchedulerPeerID || prev.ExecutionsID != bk.ExecutionsID {
+ fmt.Println("ListenNATS CREATE_RESOURCE booking upsert: auth mismatch, ignoring", bk.GetID())
+ return
+ }
+ if !prev.IsDrafted() && bk.IsDraft {
+ // Already confirmed, refuse downgrade.
+ return
+ }
+ // Expired check only on confirmation (IsDraft→false).
+ if !bk.IsDraft && !prev.ExpectedStartDate.IsZero() && prev.ExpectedStartDate.Before(time.Now()) {
+ fmt.Println("ListenNATS CREATE_RESOURCE booking: expired, deleting", bk.GetID())
+ booking.NewAccessor(adminReq).DeleteOne(bk.GetID())
+ return
+ }
+ if _, _, err := utils.GenericRawUpdateOne(bk, bk.GetID(), booking.NewAccessor(adminReq)); err != nil {
+ fmt.Println("ListenNATS CREATE_RESOURCE booking update failed:", err)
+ return
+ }
+ go refreshSelfPlanner(self.PeerID, adminReq)
+ if !bk.IsDraft {
+ go applyConsidersLocal(bk.GetID(), tools.BOOKING)
+ }
+ return
+ }
+ // New booking: standard create flow.
+ if !bk.ExpectedStartDate.IsZero() && bk.ExpectedStartDate.Before(time.Now()) {
+ fmt.Println("ListenNATS: booking start date is in the past, discarding")
+ return
+ }
+ plannerMu.RLock()
+ selfEntry := PlannerCache[self.PeerID]
+ plannerMu.RUnlock()
+ if selfEntry != nil && selfEntry.Planner != nil && !checkInstance(selfEntry.Planner, bk.ResourceID, bk.InstanceID, bk.ExpectedStartDate, bk.ExpectedEndDate) {
+ fmt.Println("ListenNATS: booking conflicts with local planner, discarding")
+ return
+ }
+ bk.IsDraft = true
+ stored, _, err := booking.NewAccessor(adminReq).StoreOne(bk)
+ if err != nil {
+ fmt.Println("ListenNATS: could not store booking:", err)
+ return
+ }
+ storedID := stored.GetID()
+ go refreshSelfPlanner(self.PeerID, adminReq)
+ time.AfterFunc(10*time.Minute, func() { draftTimeout(storedID, tools.BOOKING) })
+}
+
+func handleCreatePurchase(pr *purchase_resource.PurchaseResource, self *peer.Peer, adminReq *tools.APIRequest) {
+ if pr.DestPeerID != self.GetID() {
+ return
+ }
+ // Upsert: if a purchase with this ID already exists, verify auth and update.
+ if existing, _, loadErr := purchase_resource.NewAccessor(adminReq).LoadOne(pr.GetID()); loadErr == nil && existing != nil {
+ prev := existing.(*purchase_resource.PurchaseResource)
+ if prev.SchedulerPeerID != pr.SchedulerPeerID || prev.ExecutionsID != pr.ExecutionsID {
+ fmt.Println("ListenNATS CREATE_RESOURCE purchase upsert: auth mismatch, ignoring", pr.GetID())
+ return
+ }
+ if !prev.IsDrafted() && pr.IsDraft {
+ return
+ }
+ if _, _, err := utils.GenericRawUpdateOne(pr, pr.GetID(), purchase_resource.NewAccessor(adminReq)); err != nil {
+ fmt.Println("ListenNATS CREATE_RESOURCE purchase update failed:", err)
+ return
+ }
+ if !pr.IsDraft {
+ go applyConsidersLocal(pr.GetID(), tools.PURCHASE_RESOURCE)
+ }
+ return
+ }
+ // New purchase: standard create flow.
+ pr.IsDraft = true
+ stored, _, err := purchase_resource.NewAccessor(adminReq).StoreOne(pr)
+ if err != nil {
+ fmt.Println("ListenNATS: could not store purchase:", err)
+ return
+ }
+ storedID := stored.GetID()
+ time.AfterFunc(10*time.Minute, func() { draftTimeout(storedID, tools.PURCHASE_RESOURCE) })
+}
+
+func handleCreateResource(resp tools.NATSResponse) {
+ switch resp.Datatype {
+ case tools.WORKFLOW:
+ wf := workflow.Workflow{}
+ if err := json.Unmarshal(resp.Payload, &wf); err != nil {
+ return
+ }
+ broadcastPlanner(&wf)
+ notifyWorkflowWatchers(wf.GetID())
+ case tools.BOOKING:
+ var bk booking.Booking
+ if err := json.Unmarshal(resp.Payload, &bk); err != nil {
+ return
+ }
+ self, err := oclib.GetMySelf()
+ /*if err != nil || self == nil || bk.DestPeerID != self.GetID() {
+ return
+ }*/
+ adminReq := &tools.APIRequest{Admin: true}
+ _ = err
+ handleCreateBooking(&bk, self, adminReq)
+ case tools.PURCHASE_RESOURCE:
+ var pr purchase_resource.PurchaseResource
+ if err := json.Unmarshal(resp.Payload, &pr); err != nil {
+ return
+ }
+ self, err := oclib.GetMySelf()
+ if err != nil || self == nil {
+ return
+ }
+ adminReq := &tools.APIRequest{Admin: true}
+ handleCreatePurchase(&pr, self, adminReq)
+ }
+}
+
+// confirmResource sets IsDraft=false for a booking or purchase resource.
+// For bookings it also advances State to SCHEDULED and refreshes the local planner.
+func confirmResource(id string, dt tools.DataType) {
+ adminReq := &tools.APIRequest{Admin: true}
+ switch dt {
+ case tools.BOOKING:
+ res, _, err := booking.NewAccessor(adminReq).LoadOne(id)
+ if err != nil || res == nil {
+ fmt.Printf("confirmResource: could not load booking %s: %v\n", id, err)
+ return
+ }
+ bk := res.(*booking.Booking)
+ bk.IsDraft = false
+ bk.State = enum.SCHEDULED
+ if _, _, err := utils.GenericRawUpdateOne(bk, id, booking.NewAccessor(adminReq)); err != nil {
+ fmt.Printf("confirmResource: could not confirm booking %s: %v\n", id, err)
+ return
+ }
+ createNamespace(bk.ExecutionsID) // create Namespace locally
+ self, err := oclib.GetMySelf()
+ if err == nil && self != nil {
+ go refreshSelfPlanner(self.PeerID, adminReq)
+ }
+ case tools.PURCHASE_RESOURCE:
+ res, _, err := purchase_resource.NewAccessor(adminReq).LoadOne(id)
+ if err != nil || res == nil {
+ fmt.Printf("confirmResource: could not load purchase %s: %v\n", id, err)
+ return
+ }
+ pr := res.(*purchase_resource.PurchaseResource)
+ pr.IsDraft = false
+ if _, _, err := utils.GenericRawUpdateOne(pr, id, purchase_resource.NewAccessor(adminReq)); err != nil {
+ fmt.Printf("confirmResource: could not confirm purchase %s: %v\n", id, err)
+ }
+ }
+}
+
+// listenConfirmChannel subscribes to a NATS subject and calls confirmResource
+// for each message received. The message body is expected to be the plain
+// resource ID (UTF-8 string).
+func listenConfirmChannel(nc *nats.Conn, subject string, dt tools.DataType, wg *sync.WaitGroup) {
+ defer wg.Done()
+ ch := make(chan *nats.Msg, 64)
+ sub, err := nc.ChanSubscribe(subject, ch)
+ if err != nil {
+ fmt.Printf("listenConfirmChannel: could not subscribe to %s: %v\n", subject, err)
+ return
+ }
+ defer sub.Unsubscribe()
+ for msg := range ch {
+ confirmResource(string(msg.Data), dt)
+ }
+}
diff --git a/infrastructure/planner.go b/infrastructure/planner.go
new file mode 100644
index 0000000..18d5457
--- /dev/null
+++ b/infrastructure/planner.go
@@ -0,0 +1,353 @@
+package infrastructure
+
+import (
+ "encoding/json"
+ "fmt"
+ "slices"
+ "sync"
+ "time"
+
+ oclib "cloud.o-forge.io/core/oc-lib"
+ "cloud.o-forge.io/core/oc-lib/models/booking/planner"
+ "cloud.o-forge.io/core/oc-lib/models/workflow"
+ "cloud.o-forge.io/core/oc-lib/models/workflow/graph"
+ "cloud.o-forge.io/core/oc-lib/tools"
+)
+
+const plannerTTL = 24 * time.Hour
+
+// ---------------------------------------------------------------------------
+// Planner cache — protected by plannerMu
+// ---------------------------------------------------------------------------
+
+// plannerEntry wraps a planner snapshot with refresh-ownership tracking.
+// At most one check session may be the "refresh owner" of a given peer's
+// planner at a time: it emits PB_PLANNER to request a fresh snapshot from
+// oc-discovery and, on close (clean or forced), emits PB_CLOSE_PLANNER to
+// release the stream. Any subsequent session that needs the same peer's
+// planner will see Refreshing=true and skip the duplicate request.
+type plannerEntry struct {
+ Planner *planner.Planner
+ Refreshing bool // true while a PB_PLANNER request is in flight
+ RefreshOwner string // session UUID that initiated the current refresh
+}
+
+var plannerMu sync.RWMutex
+var PlannerCache = map[string]*plannerEntry{}
+var plannerAddedAt = map[string]time.Time{} // peerID → first-seen timestamp
+
+// ---------------------------------------------------------------------------
+// Subscriber registries — one keyed by peerID, one by workflowID
+// ---------------------------------------------------------------------------
+
+var subsMu sync.RWMutex
+var plannerSubs = map[string][]chan string{} // peerID → channels (deliver peerID)
+var workflowSubs = map[string][]chan struct{}{} // workflowID → notification channels
+
+// subscribePlanners registers interest in planner changes for the given peer IDs.
+// The returned channel receives the peerID string (non-blocking) each time any
+// of those planners is updated. Call cancel to unregister.
+func subscribePlanners(peerIDs []string) (<-chan string, func()) {
+ ch := make(chan string, 1)
+ subsMu.Lock()
+ for _, k := range peerIDs {
+ plannerSubs[k] = append(plannerSubs[k], ch)
+ }
+ subsMu.Unlock()
+ cancel := func() {
+ subsMu.Lock()
+ for _, k := range peerIDs {
+ subs := plannerSubs[k]
+ for i, s := range subs {
+ if s == ch {
+ plannerSubs[k] = append(subs[:i], subs[i+1:]...)
+ break
+ }
+ }
+ }
+ subsMu.Unlock()
+ }
+ return ch, cancel
+}
+
+// SubscribePlannerUpdates registers interest in planner changes for the given
+// peer IDs. The returned channel receives the peerID string (non-blocking) each
+// time any of those planners is updated. Call cancel to unregister.
+func SubscribePlannerUpdates(peerIDs []string) (<-chan string, func()) {
+ return subscribePlanners(peerIDs)
+}
+
+// SubscribeWorkflowUpdates registers interest in workflow modifications for the
+// given workflow ID. The returned channel is signalled when the workflow changes
+// (peer list may have grown or shrunk). Call cancel to unregister.
+func SubscribeWorkflowUpdates(wfID string) (<-chan struct{}, func()) {
+ ch, cancel := subscribe(&subsMu, workflowSubs, []string{wfID})
+ return ch, cancel
+}
+
+// subscribe is the generic helper used by the workflow registry.
+func subscribe(mu *sync.RWMutex, registry map[string][]chan struct{}, keys []string) (<-chan struct{}, func()) {
+ ch := make(chan struct{}, 1)
+ mu.Lock()
+ for _, k := range keys {
+ registry[k] = append(registry[k], ch)
+ }
+ mu.Unlock()
+ cancel := func() {
+ mu.Lock()
+ for _, k := range keys {
+ subs := registry[k]
+ for i, s := range subs {
+ if s == ch {
+ registry[k] = append(subs[:i], subs[i+1:]...)
+ break
+ }
+ }
+ }
+ mu.Unlock()
+ }
+ return ch, cancel
+}
+
+func notifyPlannerWatchers(peerID string) {
+ subsMu.RLock()
+ subs := plannerSubs[peerID]
+ subsMu.RUnlock()
+ for _, ch := range subs {
+ select {
+ case ch <- peerID:
+ default:
+ }
+ }
+}
+
+func notifyWorkflowWatchers(wfID string) {
+ notify(&subsMu, workflowSubs, wfID)
+}
+
+func notify(mu *sync.RWMutex, registry map[string][]chan struct{}, key string) {
+ mu.RLock()
+ subs := registry[key]
+ mu.RUnlock()
+ for _, ch := range subs {
+ select {
+ case ch <- struct{}{}:
+ default:
+ }
+ }
+}
+
+// ---------------------------------------------------------------------------
+// Cache helpers
+// ---------------------------------------------------------------------------
+
+// storePlanner inserts or updates the planner snapshot for peerID.
+// On first insertion it schedules an automatic eviction after plannerTTL.
+// Existing refresh-ownership state (Refreshing / RefreshOwner) is preserved
+// so that an in-flight request is not inadvertently reset.
+// All subscribers interested in this peer are notified.
+func storePlanner(peerID string, p *planner.Planner) {
+ plannerMu.Lock()
+ entry := PlannerCache[peerID]
+ isNew := entry == nil
+ if isNew {
+ entry = &plannerEntry{}
+ PlannerCache[peerID] = entry
+ plannerAddedAt[peerID] = time.Now()
+ go evictAfter(peerID, plannerTTL)
+ }
+ entry.Planner = p
+ plannerMu.Unlock()
+ notifyPlannerWatchers(peerID)
+}
+
+// evictAfter waits ttl from first insertion then deletes the cache entry and
+// emits PB_CLOSE_PLANNER so oc-discovery stops streaming for this peer.
+// This is the only path that actually removes an entry from PlannerCache;
+// session close (ReleaseRefreshOwnership) only resets ownership state.
+func evictAfter(peerID string, ttl time.Duration) {
+ time.Sleep(ttl)
+ plannerMu.Lock()
+ _, exists := PlannerCache[peerID]
+ if exists {
+ delete(PlannerCache, peerID)
+ delete(plannerAddedAt, peerID)
+ }
+ plannerMu.Unlock()
+ if exists {
+ EmitNATS(peerID, tools.PropalgationMessage{Action: tools.PB_CLOSE_PLANNER})
+ }
+}
+
+// ---------------------------------------------------------------------------
+// Planner refresh / broadcast
+// ---------------------------------------------------------------------------
+
+// RequestPlannerRefresh asks oc-discovery for a fresh planner snapshot for
+// each peer in peerIDs. Only the first session to request a given peer becomes
+// its "refresh owner": subsequent sessions see Refreshing=true and skip the
+// duplicate PB_PLANNER emission. Returns the subset of peerIDs for which this
+// session claimed ownership (needed to release on close).
+func RequestPlannerRefresh(peerIDs []string, executionsID string) []string {
+ var owned []string
+ for _, peerID := range peerIDs {
+ plannerMu.Lock()
+ entry := PlannerCache[peerID]
+ if entry == nil {
+ entry = &plannerEntry{}
+ PlannerCache[peerID] = entry
+ plannerAddedAt[peerID] = time.Now()
+ go evictAfter(peerID, plannerTTL)
+ }
+ shouldRequest := !entry.Refreshing
+ if shouldRequest {
+ entry.Refreshing = true
+ entry.RefreshOwner = executionsID
+ }
+ plannerMu.Unlock()
+ if shouldRequest {
+ owned = append(owned, peerID)
+ if p, err := oclib.GetMySelf(); err == nil && p != nil && p.PeerID == peerID {
+ // Self peer: generate and cache the planner directly without
+ // going through NATS / oc-discovery.
+ go refreshSelfPlanner(peerID, &tools.APIRequest{Admin: true})
+ } else {
+ payload, _ := json.Marshal(map[string]any{"peer_id": peerID})
+ fmt.Println("PB_PLANNER", peerID)
+ EmitNATS(peerID, tools.PropalgationMessage{
+ Action: tools.PB_PLANNER,
+ Payload: payload,
+ })
+ }
+ }
+ }
+ return owned
+}
+
+// ReleaseRefreshOwnership is called when a check session closes (clean or
+// forced). For each peer this session owns, it resets the refresh state and
+// emits PB_CLOSE_PLANNER so oc-discovery stops the planner stream.
+// The planner data itself stays in the cache until TTL eviction.
+func ReleaseRefreshOwnership(peerIDs []string, executionsID string) {
+ for _, peerID := range peerIDs {
+ plannerMu.Lock()
+ if entry := PlannerCache[peerID]; entry != nil && entry.RefreshOwner == executionsID {
+ entry.Refreshing = false
+ entry.RefreshOwner = ""
+ }
+ plannerMu.Unlock()
+ payload, _ := json.Marshal(map[string]any{"peer_id": peerID})
+ EmitNATS(peerID, tools.PropalgationMessage{
+ Action: tools.PB_CLOSE_PLANNER,
+ Payload: payload,
+ })
+ }
+}
+
+// broadcastPlanner iterates the storage and compute peers of the given workflow
+// and, for each peer not yet in the cache, emits a PB_PLANNER propagation so
+// downstream consumers (oc-discovery, other schedulers) refresh their state.
+func broadcastPlanner(wf *workflow.Workflow) {
+ if wf.Graph == nil {
+ return
+ }
+ items := []graph.GraphItem{}
+ items = append(items, wf.GetGraphItems(wf.Graph.IsStorage)...)
+ items = append(items, wf.GetGraphItems(wf.Graph.IsCompute)...)
+
+ seen := []string{}
+ for _, item := range items {
+ i := item
+ _, res := i.GetResource()
+ if res == nil {
+ continue
+ }
+ creatorID := res.GetCreatorID()
+ if slices.Contains(seen, creatorID) {
+ continue
+ }
+
+ data := oclib.NewRequestAdmin(oclib.LibDataEnum(oclib.PEER), nil).LoadOne(creatorID)
+ p := data.ToPeer()
+ if p == nil {
+ continue
+ }
+
+ plannerMu.RLock()
+ cached := PlannerCache[p.PeerID]
+ plannerMu.RUnlock()
+
+ // Only request if no snapshot and no refresh already in flight.
+ if cached == nil || (cached.Planner == nil && !cached.Refreshing) {
+ payload, err := json.Marshal(map[string]interface{}{"peer_id": p.PeerID})
+ if err != nil {
+ continue
+ }
+ seen = append(seen, creatorID)
+ EmitNATS(p.PeerID, tools.PropalgationMessage{
+ Action: tools.PB_PLANNER,
+ Payload: payload,
+ })
+ }
+ }
+}
+
+// ---------------------------------------------------------------------------
+// Self-planner initialisation
+// ---------------------------------------------------------------------------
+
+// InitSelfPlanner bootstraps our own planner entry at startup.
+// It waits (with 15-second retries) for our peer record to be present in the
+// database before generating the first planner snapshot and broadcasting it
+// on PB_PLANNER. This handles the race between oc-scheduler starting before
+// oc-peer has fully registered our node.
+func InitSelfPlanner() {
+ for {
+ self, err := oclib.GetMySelf()
+ if err != nil || self == nil {
+ fmt.Println("InitSelfPlanner: self peer not found yet, retrying in 15s...")
+ time.Sleep(15 * time.Second)
+ continue
+ }
+ refreshSelfPlanner(self.PeerID, &tools.APIRequest{Admin: true})
+ return
+ }
+}
+
+// ---------------------------------------------------------------------------
+// Self-planner refresh
+// ---------------------------------------------------------------------------
+
+// refreshSelfPlanner regenerates the local planner from the current state of
+// the booking DB, stores it in PlannerCache under our own node UUID, and
+// broadcasts it on PROPALGATION_EVENT / PB_PLANNER so all listeners (including
+// oc-discovery) are kept in sync.
+//
+// It should be called whenever a booking for our own peer is created, whether
+// by direct DB insertion (self-peer routing) or upon receiving a CREATE_RESOURCE
+// BOOKING message from oc-discovery.
+func refreshSelfPlanner(peerID string, request *tools.APIRequest) {
+ p, err := planner.GenerateShallow(request)
+ if err != nil {
+ fmt.Println("refreshSelfPlanner: could not generate planner:", err)
+ return
+ }
+
+ // Update the local cache and notify any waiting CheckStream goroutines.
+ storePlanner(peerID, p)
+
+ // Broadcast the updated planner so remote peers (and oc-discovery) can
+ // refresh their view of our availability.
+ type plannerWithPeer struct {
+ PeerID string `json:"peer_id"`
+ *planner.Planner
+ }
+ plannerPayload, err := json.Marshal(plannerWithPeer{PeerID: peerID, Planner: p})
+ if err != nil {
+ return
+ }
+ EmitNATS(peerID, tools.PropalgationMessage{
+ Action: tools.PB_PLANNER,
+ Payload: plannerPayload,
+ })
+}
diff --git a/infrastructure/scheduler.go b/infrastructure/scheduler.go
index 4f9485d..695dd81 100644
--- a/infrastructure/scheduler.go
+++ b/infrastructure/scheduler.go
@@ -4,18 +4,17 @@ import (
"encoding/json"
"errors"
"fmt"
+ "oc-scheduler/infrastructure/scheduling"
"strings"
"time"
oclib "cloud.o-forge.io/core/oc-lib"
"cloud.o-forge.io/core/oc-lib/models/bill"
"cloud.o-forge.io/core/oc-lib/models/booking"
- "cloud.o-forge.io/core/oc-lib/models/booking/planner"
"cloud.o-forge.io/core/oc-lib/models/common/enum"
"cloud.o-forge.io/core/oc-lib/models/common/pricing"
"cloud.o-forge.io/core/oc-lib/models/order"
"cloud.o-forge.io/core/oc-lib/models/peer"
- "cloud.o-forge.io/core/oc-lib/models/resources"
"cloud.o-forge.io/core/oc-lib/models/resources/purchase_resource"
"cloud.o-forge.io/core/oc-lib/models/utils"
"cloud.o-forge.io/core/oc-lib/models/workflow"
@@ -48,6 +47,9 @@ type WorkflowSchedule struct {
SelectedStrategies workflow.ConfigItem `json:"selected_strategies"`
SelectedBillingStrategy pricing.BillingStrategy `json:"selected_billing_strategy"`
+
+ // Confirm, when true, triggers Schedule() to confirm the drafts held by this session.
+ Confirm bool `json:"confirm,omitempty"`
}
// TODO PREEMPTION !
@@ -67,7 +69,7 @@ ne pourra se lancé que SI il n'existe pas d'exécution se lançant durant la p
func NewScheduler(mode int, start string, end string, durationInS float64, cron string) *WorkflowSchedule {
ws := &WorkflowSchedule{
UUID: uuid.New().String(),
- Start: time.Now(),
+ Start: time.Now().Add(asapBuffer),
BookingMode: booking.BookingMode(mode),
DurationS: durationInS,
Cron: cron,
@@ -84,21 +86,18 @@ func NewScheduler(mode int, start string, end string, durationInS float64, cron
return ws
}
-func (ws *WorkflowSchedule) GetBuyAndBook(wfID string, request *tools.APIRequest) (bool, *workflow.Workflow, []*workflow_execution.WorkflowExecution, []*purchase_resource.PurchaseResource, []*booking.Booking, error) {
- if request.Caller == nil && request.Caller.URLS == nil && request.Caller.URLS[tools.BOOKING] == nil || request.Caller.URLS[tools.BOOKING][tools.GET] == "" {
- return false, nil, []*workflow_execution.WorkflowExecution{}, []*purchase_resource.PurchaseResource{}, []*booking.Booking{}, errors.New("no caller defined")
- }
+func (ws *WorkflowSchedule) GetBuyAndBook(wfID string, request *tools.APIRequest) (bool, *workflow.Workflow, []*workflow_execution.WorkflowExecution, []scheduling.SchedulerObject, []scheduling.SchedulerObject, error) {
access := workflow.NewAccessor(request)
res, code, err := access.LoadOne(wfID)
if code != 200 {
- return false, nil, []*workflow_execution.WorkflowExecution{}, []*purchase_resource.PurchaseResource{}, []*booking.Booking{}, errors.New("could not load the workflow with id: " + err.Error())
+ return false, nil, []*workflow_execution.WorkflowExecution{}, []scheduling.SchedulerObject{}, []scheduling.SchedulerObject{}, errors.New("could not load the workflow with id: " + err.Error())
}
wf := res.(*workflow.Workflow)
isPreemptible, longest, priceds, wf, err := wf.Planify(ws.Start, ws.End,
ws.SelectedInstances, ws.SelectedPartnerships, ws.SelectedBuyings, ws.SelectedStrategies,
int(ws.BookingMode), request)
if err != nil {
- return false, wf, []*workflow_execution.WorkflowExecution{}, []*purchase_resource.PurchaseResource{}, []*booking.Booking{}, err
+ return false, wf, []*workflow_execution.WorkflowExecution{}, []scheduling.SchedulerObject{}, []scheduling.SchedulerObject{}, err
}
ws.DurationS = longest
ws.Message = "We estimate that the workflow will start at " + ws.Start.String() + " and last " + fmt.Sprintf("%v", ws.DurationS) + " seconds."
@@ -107,101 +106,94 @@ func (ws *WorkflowSchedule) GetBuyAndBook(wfID string, request *tools.APIRequest
}
execs, err := ws.GetExecutions(wf, isPreemptible)
if err != nil {
- return false, wf, []*workflow_execution.WorkflowExecution{}, []*purchase_resource.PurchaseResource{}, []*booking.Booking{}, err
+ return false, wf, []*workflow_execution.WorkflowExecution{}, []scheduling.SchedulerObject{}, []scheduling.SchedulerObject{}, err
}
- purchased := []*purchase_resource.PurchaseResource{}
- bookings := []*booking.Booking{}
+ purchased := []scheduling.SchedulerObject{}
+ bookings := []scheduling.SchedulerObject{}
for _, exec := range execs {
- purchased = append(purchased, exec.Buy(ws.SelectedBillingStrategy, ws.UUID, wfID, priceds)...)
- bookings = append(bookings, exec.Book(ws.UUID, wfID, priceds)...)
+ for _, obj := range exec.Buy(ws.SelectedBillingStrategy, ws.UUID, wfID, priceds) {
+ purchased = append(purchased, scheduling.ToSchedulerObject(tools.PURCHASE_RESOURCE, obj))
+ }
+ for _, obj := range exec.Book(ws.UUID, wfID, priceds) {
+ bookings = append(bookings, scheduling.ToSchedulerObject(tools.BOOKING, obj))
+ }
}
return true, wf, execs, purchased, bookings, nil
}
-func (ws *WorkflowSchedule) GenerateOrder(purchases []*purchase_resource.PurchaseResource, bookings []*booking.Booking, request *tools.APIRequest) error {
+// GenerateOrder creates a draft order (+ draft bill) for the given purchases and bookings.
+// Returns the created order ID and any error.
+func (ws *WorkflowSchedule) GenerateOrder(purchases []scheduling.SchedulerObject, bookings []scheduling.SchedulerObject, executionsID string, request *tools.APIRequest) (string, error) {
newOrder := &order.Order{
AbstractObject: utils.AbstractObject{
Name: "order_" + request.PeerID + "_" + time.Now().UTC().Format("2006-01-02T15:04:05"),
IsDraft: true,
},
- ExecutionsID: ws.UUID,
- Purchases: purchases,
- Bookings: bookings,
+ ExecutionsID: executionsID,
+ Purchases: []*purchase_resource.PurchaseResource{},
+ Bookings: []*booking.Booking{},
Status: enum.PENDING,
}
- if res, _, err := order.NewAccessor(request).StoreOne(newOrder); err == nil {
- if _, err := bill.DraftFirstBill(res.(*order.Order), request); err != nil {
- return err
- }
- return nil
- } else {
- return err
+ for _, purch := range purchases {
+ newOrder.Purchases = append(
+ newOrder.Purchases, scheduling.FromSchedulerObject(tools.PURCHASE_RESOURCE, purch).(*purchase_resource.PurchaseResource))
}
+ for _, b := range bookings {
+ newOrder.Bookings = append(
+ newOrder.Bookings, scheduling.FromSchedulerObject(tools.BOOKING, b).(*booking.Booking))
+ }
+ res, _, err := order.NewAccessor(request).StoreOne(newOrder)
+ if err != nil {
+ return "", err
+ }
+ if _, err := bill.DraftFirstBill(res.(*order.Order), request); err != nil {
+ return res.GetID(), err
+ }
+ return res.GetID(), nil
}
func (ws *WorkflowSchedule) Schedules(wfID string, request *tools.APIRequest) (*WorkflowSchedule, *workflow.Workflow, []*workflow_execution.WorkflowExecution, error) {
if request == nil {
return ws, nil, []*workflow_execution.WorkflowExecution{}, errors.New("no request found")
}
- c := request.Caller
- if c == nil || c.URLS == nil || c.URLS[tools.BOOKING] == nil {
- return ws, nil, []*workflow_execution.WorkflowExecution{}, errors.New("no caller defined")
- }
- methods := c.URLS[tools.BOOKING]
- if _, ok := methods[tools.GET]; !ok {
- return ws, nil, []*workflow_execution.WorkflowExecution{}, errors.New("no path found")
- }
- ok, wf, executions, purchases, bookings, err := ws.GetBuyAndBook(wfID, request)
- ws.WorkflowExecution = executions
- if !ok || err != nil {
- return ws, nil, executions, errors.New("could not book the workflow : " + fmt.Sprintf("%v", err))
- }
- ws.Workflow = wf
-
- // Resolve our own peer MongoDB-ID once; used to decide local vs NATS routing.
selfID, _ := oclib.GetMySelf()
- errCh := make(chan error, len(purchases))
- for _, purchase := range purchases {
- purchase.IsDraft = true
- go propagateResource(purchase, purchase.DestPeerID, tools.PURCHASE_RESOURCE, selfID, request, errCh)
- }
- for i := 0; i < len(purchases); i++ {
- if err := <-errCh; err != nil {
- return ws, wf, executions, errors.New("could not propagate purchase: " + fmt.Sprintf("%v", err))
+ // If the client provides a scheduling_id from a Check session, confirm the
+ // pre-created drafts (bookings/purchases). Executions already exist as drafts
+ // and will be confirmed later by the considers mechanism.
+ if ws.UUID != "" {
+ adminReq := &tools.APIRequest{Admin: true}
+
+ // Obsolescence check: abort if any session execution's start date has passed.
+ executions := loadSessionExecs(ws.UUID)
+ for _, exec := range executions {
+ if !exec.ExecDate.IsZero() && exec.ExecDate.Before(time.Now()) {
+ return ws, nil, nil, fmt.Errorf("execution %s is obsolete (start date in the past)", exec.GetID())
+ }
}
- }
- errCh = make(chan error, len(bookings))
- for _, bk := range bookings {
- bk.IsDraft = true
- go propagateResource(bk, bk.DestPeerID, tools.BOOKING, selfID, request, errCh)
- }
- for i := 0; i < len(bookings); i++ {
- if err := <-errCh; err != nil {
- return ws, wf, executions, errors.New("could not propagate booking: " + fmt.Sprintf("%v", err))
+ if err := ConfirmSession(ws.UUID, selfID, request); err != nil {
+ return ws, nil, []*workflow_execution.WorkflowExecution{}, fmt.Errorf("confirm session failed: %w", err)
}
- }
- if err := ws.GenerateOrder(purchases, bookings, request); err != nil {
- return ws, wf, executions, err
- }
-
- fmt.Println("Schedules")
- for _, exec := range executions {
- err := exec.PurgeDraft(request)
- if err != nil {
- return ws, nil, []*workflow_execution.WorkflowExecution{}, errors.New("purge draft" + fmt.Sprintf("%v", err))
+ for _, exec := range executions {
+ go WatchExecDeadline(exec.GetID(), exec.ExecDate, selfID, request)
}
- exec.StoreDraftDefault()
- utils.GenericStoreOne(exec, workflow_execution.NewAccessor(request))
- go EmitConsidersExecution(exec, wf)
+
+ obj, _, _ := workflow.NewAccessor(request).LoadOne(wfID)
+ if obj == nil {
+ return ws, nil, executions, nil
+ }
+ wf := obj.(*workflow.Workflow)
+ ws.Workflow = wf
+ ws.WorkflowExecution = executions
+ wf.GetAccessor(adminReq).UpdateOne(wf.Serialize(wf), wf.GetID())
+ return ws, wf, executions, nil
}
- fmt.Println("Schedules")
- wf.GetAccessor(&tools.APIRequest{Admin: true}).UpdateOne(wf.Serialize(wf), wf.GetID())
-
- return ws, wf, executions, nil
+ // Schedule must be called from a Check session (ws.UUID set above).
+ // Direct scheduling without a prior Check session is not supported.
+ return ws, nil, []*workflow_execution.WorkflowExecution{}, errors.New("no scheduling session: use the Check stream first")
}
// propagateResource routes a purchase or booking to its destination:
@@ -210,14 +202,12 @@ func (ws *WorkflowSchedule) Schedules(wfID string, request *tools.APIRequest) (*
// - Otherwise a NATS CREATE_RESOURCE message is emitted so the destination
// peer can process it asynchronously.
//
-// The caller is responsible for setting obj.IsDraft = true before calling.
+// The caller is responsible for setting obj.IsDraft before calling.
func propagateResource(obj utils.DBObject, destPeerID string, dt tools.DataType, selfMongoID *peer.Peer, request *tools.APIRequest, errCh chan error) {
- if selfMongoID == nil {
- return
- } // booking or purchase
if destPeerID == selfMongoID.GetID() {
- if _, _, err := obj.GetAccessor(request).StoreOne(obj); err != nil {
- errCh <- fmt.Errorf("could not store %s locally: %w", dt.String(), err)
+ stored := oclib.NewRequestAdmin(oclib.LibDataEnum(dt), nil).StoreOne(obj.Serialize(obj))
+ if stored.Err != "" || stored.Data == nil {
+ errCh <- fmt.Errorf("could not store %s locally: %s", dt.String(), stored.Err)
return
}
// The planner tracks booking time-slots only; purchases do not affect it.
@@ -227,17 +217,32 @@ func propagateResource(obj utils.DBObject, destPeerID string, dt tools.DataType,
errCh <- nil
return
}
- payload, err := json.Marshal(obj)
+ m := obj.Serialize(obj)
+ if m["dest_peer_id"] != nil {
+ if data := oclib.NewRequestAdmin(oclib.LibDataEnum(oclib.PEER), nil).LoadOne(fmt.Sprintf("%v", m["dest_peer_id"])); data.Data != nil {
+ m["peer_id"] = data.Data.(*peer.Peer).PeerID
+ }
+ } else {
+ fmt.Println("NO DEST ID")
+ return
+ }
+ payload, err := json.Marshal(m)
if err != nil {
errCh <- fmt.Errorf("could not serialize %s: %w", dt.String(), err)
return
}
- tools.NewNATSCaller().SetNATSPub(tools.CREATE_RESOURCE, tools.NATSResponse{
- FromApp: "oc-scheduler",
- Datatype: dt,
- Method: int(tools.CREATE_RESOURCE),
+ if b, err := json.Marshal(&tools.PropalgationMessage{
+ DataType: dt.EnumIndex(),
+ Action: tools.PB_CREATE,
Payload: payload,
- })
+ }); err == nil {
+ tools.NewNATSCaller().SetNATSPub(tools.PROPALGATION_EVENT, tools.NATSResponse{
+ FromApp: "oc-scheduler",
+ Datatype: dt,
+ Method: int(tools.PROPALGATION_EVENT),
+ Payload: b,
+ })
+ }
errCh <- nil
}
@@ -335,303 +340,3 @@ type Schedule struct {
* TODO : LARGEST GRAIN PLANIFYING THE WORKFLOW WHEN OPTION IS SET
* SET PROTECTION BORDER TIME
*/
-
-// ---------------------------------------------------------------------------
-// Slot availability check
-// ---------------------------------------------------------------------------
-
-const (
- checkWindowHours = 5 // how far ahead to scan for a free slot (hours)
- checkStepMin = 15 // time increment per scan step (minutes)
-)
-
-// CheckResult holds the outcome of a slot availability check.
-type CheckResult struct {
- Available bool `json:"available"`
- Start time.Time `json:"start"`
- End *time.Time `json:"end,omitempty"`
- // NextSlot is the nearest free slot found within checkWindowHours when
- // the requested slot is unavailable, or the preferred (conflict-free) slot
- // when running in preemption mode.
- NextSlot *time.Time `json:"next_slot,omitempty"`
- Warnings []string `json:"warnings,omitempty"`
- // Preemptible is true when the check was run in preemption mode.
- Preemptible bool `json:"preemptible,omitempty"`
-}
-
-// bookingResource is the minimum info needed to verify a resource against the
-// planner cache.
-type bookingResource struct {
- id string
- peerID string
- instanceID string // resolved from WorkflowSchedule.SelectedInstances
-}
-
-// Check verifies that all booking-relevant resources (storage and compute) of
-// the given workflow have capacity for the requested time slot.
-//
-// - asap=true → ignore ws.Start, begin searching from time.Now()
-// - preemption → always return Available=true but populate Warnings with
-// conflicts and NextSlot with the nearest conflict-free alternative
-func (ws *WorkflowSchedule) Check(wfID string, asap bool, preemption bool, request *tools.APIRequest) (*CheckResult, error) {
- // 1. Load workflow
- obj, code, err := workflow.NewAccessor(request).LoadOne(wfID)
- if code != 200 || err != nil {
- msg := "could not load workflow " + wfID
- if err != nil {
- msg += ": " + err.Error()
- }
- return nil, errors.New(msg)
- }
- wf := obj.(*workflow.Workflow)
-
- // 2. Resolve start
- start := ws.Start
- if asap || start.IsZero() {
- start = time.Now()
- }
-
- // 3. Resolve end – use explicit end/duration or estimate via Planify
- end := ws.End
- if end == nil {
- if ws.DurationS > 0 {
- e := start.Add(time.Duration(ws.DurationS * float64(time.Second)))
- end = &e
- } else {
- _, longest, _, _, planErr := wf.Planify(
- start, nil,
- ws.SelectedInstances, ws.SelectedPartnerships,
- ws.SelectedBuyings, ws.SelectedStrategies,
- int(ws.BookingMode), request,
- )
- if planErr == nil && longest > 0 {
- e := start.Add(time.Duration(longest) * time.Second)
- end = &e
- }
- }
- }
-
- // 4. Extract booking-relevant (storage + compute) resources from the graph,
- // resolving the selected instance for each resource.
- checkables := collectBookingResources(wf, ws.SelectedInstances)
- fmt.Println(checkables)
- // 5. Check every resource against its peer's planner
- unavailable, warnings := checkResourceAvailability(checkables, start, end)
- fmt.Println(unavailable, warnings)
- result := &CheckResult{
- Start: start,
- End: end,
- Warnings: warnings,
- }
-
- // 6. Preemption mode: mark as schedulable regardless of conflicts, but
- // surface warnings and the nearest conflict-free alternative.
- if preemption {
- result.Available = true
- result.Preemptible = true
- if len(unavailable) > 0 {
- result.NextSlot = findNextSlot(checkables, start, end, checkWindowHours)
- }
- return result, nil
- }
-
- // 7. All resources are free
- if len(unavailable) == 0 {
- result.Available = true
- return result, nil
- }
-
- // 8. Slot unavailable – locate the nearest free slot within the window
- result.Available = false
- result.NextSlot = findNextSlot(checkables, start, end, checkWindowHours)
- return result, nil
-}
-
-// collectBookingResources returns unique storage and compute resources from the
-// workflow graph. For each resource the selected instance ID is resolved from
-// selectedInstances (the scheduler's SelectedInstances ConfigItem) so the planner
-// check targets the exact instance chosen by the user.
-func collectBookingResources(wf *workflow.Workflow, selectedInstances workflow.ConfigItem) []bookingResource {
- if wf.Graph == nil {
- return nil
- }
- seen := map[string]bool{}
- var result []bookingResource
-
- resolveInstanceID := func(res interface {
- GetID() string
- GetCreatorID() string
- }) string {
- idx := selectedInstances.Get(res.GetID())
- switch r := res.(type) {
- case *resources.StorageResource:
- if inst := r.GetSelectedInstance(idx); inst != nil {
- return inst.GetID()
- }
- case *resources.ComputeResource:
- if inst := r.GetSelectedInstance(idx); inst != nil {
- return inst.GetID()
- }
- }
- return ""
- }
-
- for _, item := range wf.GetGraphItems(wf.Graph.IsStorage) {
- i := item
- _, res := i.GetResource()
- if res == nil {
- continue
- }
- id, peerID := res.GetID(), res.GetCreatorID()
- if peerID == "" || seen[id] {
- continue
- }
- seen[id] = true
- result = append(result, bookingResource{
- id: id,
- peerID: peerID,
- instanceID: resolveInstanceID(res),
- })
- }
-
- for _, item := range wf.GetGraphItems(wf.Graph.IsCompute) {
- i := item
- _, res := i.GetResource()
- if res == nil {
- continue
- }
- id, peerID := res.GetID(), res.GetCreatorID()
- if peerID == "" || seen[id] {
- continue
- }
- seen[id] = true
- result = append(result, bookingResource{
- id: id,
- peerID: peerID,
- instanceID: resolveInstanceID(res),
- })
- }
-
- return result
-}
-
-// checkResourceAvailability returns the IDs of unavailable resources and
-// human-readable warning messages.
-func checkResourceAvailability(res []bookingResource, start time.Time, end *time.Time) (unavailable []string, warnings []string) {
- for _, r := range res {
- plannerMu.RLock()
- entry := PlannerCache[r.peerID]
- plannerMu.RUnlock()
- if entry == nil || entry.Planner == nil {
- warnings = append(warnings, fmt.Sprintf(
- "peer %s planner not in cache for resource %s – assuming available", r.peerID, r.id))
- continue
- }
- if !checkInstance(entry.Planner, r.id, r.instanceID, start, end) {
- unavailable = append(unavailable, r.id)
- warnings = append(warnings, fmt.Sprintf(
- "resource %s is not available in [%s – %s]",
- r.id, start.Format(time.RFC3339), formatOptTime(end)))
- }
- }
- return
-}
-
-// checkInstance checks availability for the specific instance resolved by the
-// scheduler. When instanceID is empty (no instance selected / none resolvable),
-// it falls back to checking all instances known in the planner and returns true
-// if any one has remaining capacity. Returns true when no capacity is recorded.
-func checkInstance(p *planner.Planner, resourceID string, instanceID string, start time.Time, end *time.Time) bool {
- if instanceID != "" {
- return p.Check(resourceID, instanceID, nil, start, end)
- }
- // Fallback: accept if any known instance has free capacity
- caps, ok := p.Capacities[resourceID]
- if !ok || len(caps) == 0 {
- return true // no recorded usage → assume free
- }
- for id := range caps {
- if p.Check(resourceID, id, nil, start, end) {
- return true
- }
- }
- return false
-}
-
-// findNextSlot scans forward from 'from' in checkStepMin increments for up to
-// windowH hours and returns the first candidate start time at which all
-// resources are simultaneously free.
-func findNextSlot(resources []bookingResource, from time.Time, originalEnd *time.Time, windowH int) *time.Time {
- duration := time.Hour
- if originalEnd != nil {
- if d := originalEnd.Sub(from); d > 0 {
- duration = d
- }
- }
- step := time.Duration(checkStepMin) * time.Minute
- limit := from.Add(time.Duration(windowH) * time.Hour)
- for t := from.Add(step); t.Before(limit); t = t.Add(step) {
- e := t.Add(duration)
- if unavail, _ := checkResourceAvailability(resources, t, &e); len(unavail) == 0 {
- return &t
- }
- }
- return nil
-}
-
-func formatOptTime(t *time.Time) string {
- if t == nil {
- return "open"
- }
- return t.Format(time.RFC3339)
-}
-
-// GetWorkflowPeerIDs loads the workflow and returns the deduplicated list of
-// creator peer IDs for all its storage and compute resources.
-// These are the peers whose planners must be watched by a check stream.
-func GetWorkflowPeerIDs(wfID string, request *tools.APIRequest) ([]string, error) {
- obj, code, err := workflow.NewAccessor(request).LoadOne(wfID)
- if code != 200 || err != nil {
- msg := "could not load workflow " + wfID
- if err != nil {
- msg += ": " + err.Error()
- }
- return nil, errors.New(msg)
- }
- wf := obj.(*workflow.Workflow)
- if wf.Graph == nil {
- return nil, nil
- }
- seen := map[string]bool{}
- var peerIDs []string
- for _, item := range wf.GetGraphItems(wf.Graph.IsStorage) {
- i := item
- _, res := i.GetResource()
- if res == nil {
- continue
- }
- if id := res.GetCreatorID(); id != "" && !seen[id] {
- seen[id] = true
- peerIDs = append(peerIDs, id)
- }
- }
- for _, item := range wf.GetGraphItems(wf.Graph.IsCompute) {
- i := item
- _, res := i.GetResource()
- if res == nil {
- continue
- }
- if id := res.GetCreatorID(); id != "" && !seen[id] {
- seen[id] = true
- peerIDs = append(peerIDs, id)
- }
- }
- realPeersID := []string{}
- access := oclib.NewRequestAdmin(oclib.LibDataEnum(tools.PEER), nil)
- for _, id := range peerIDs {
- if data := access.LoadOne(id); data.Data != nil {
- realPeersID = append(realPeersID, data.ToPeer().PeerID)
- }
- }
- return realPeersID, nil
-}
diff --git a/infrastructure/scheduling/objects.go b/infrastructure/scheduling/objects.go
new file mode 100644
index 0000000..cb03d4d
--- /dev/null
+++ b/infrastructure/scheduling/objects.go
@@ -0,0 +1,142 @@
+package scheduling
+
+import (
+ "encoding/json"
+
+ "cloud.o-forge.io/core/oc-lib/models/booking"
+ "cloud.o-forge.io/core/oc-lib/models/resources/purchase_resource"
+ "cloud.o-forge.io/core/oc-lib/models/utils"
+ "cloud.o-forge.io/core/oc-lib/tools"
+)
+
+type SchedulerObject interface {
+ utils.DBObject
+ SetIsDraft(bool)
+ GetKey() string
+ SetSchedulerPeerID(peerID string)
+ SetExecutionsID(ei string)
+ GetDestPeer() string
+ GetPeerSession() string
+ GetExecutionsId() string
+ GetExecutionId() string
+}
+
+type ScheduledPurchase struct {
+ purchase_resource.PurchaseResource
+}
+
+type ScheduledBooking struct {
+ booking.Booking
+}
+
+func FromSchedulerDBObject(dt tools.DataType, obj SchedulerObject) utils.DBObject {
+ switch dt {
+ case tools.BOOKING:
+ o := &booking.Booking{}
+ b, _ := json.Marshal(obj)
+ json.Unmarshal(b, &o)
+ return o
+ case tools.PURCHASE_RESOURCE:
+ o := &purchase_resource.PurchaseResource{}
+ b, _ := json.Marshal(obj)
+ json.Unmarshal(b, &o)
+ return o
+ }
+ return nil
+}
+
+func FromSchedulerObject(dt tools.DataType, obj SchedulerObject) utils.ShallowDBObject {
+ switch dt {
+ case tools.BOOKING:
+ o := &booking.Booking{}
+ b, _ := json.Marshal(obj)
+ json.Unmarshal(b, &o)
+ return o
+ case tools.PURCHASE_RESOURCE:
+ o := &purchase_resource.PurchaseResource{}
+ b, _ := json.Marshal(obj)
+ json.Unmarshal(b, &o)
+ return o
+ }
+ return nil
+}
+
+func ToSchedulerObject(dt tools.DataType, obj utils.ShallowDBObject) SchedulerObject {
+ switch dt {
+ case tools.BOOKING:
+ o := &ScheduledBooking{}
+ b, _ := json.Marshal(obj)
+ json.Unmarshal(b, &o)
+ return o
+ case tools.PURCHASE_RESOURCE:
+ o := &ScheduledPurchase{}
+ b, _ := json.Marshal(obj)
+ json.Unmarshal(b, &o)
+ return o
+ }
+ return nil
+}
+
+func (b *ScheduledBooking) GetExecutionId() string {
+ return b.ExecutionID
+}
+
+func (b *ScheduledPurchase) GetExecutionId() string {
+ return b.ExecutionID
+}
+
+func (b *ScheduledBooking) GetExecutionsId() string {
+ return b.ExecutionsID
+}
+
+func (b *ScheduledPurchase) GetExecutionsId() string {
+ return b.ExecutionsID
+}
+
+func (b *ScheduledBooking) GetPeerSession() string {
+ return b.SchedulerPeerID
+}
+
+func (b *ScheduledPurchase) GetPeerSession() string {
+ return b.SchedulerPeerID
+}
+
+func (b *ScheduledBooking) GetDestPeer() string {
+ return b.DestPeerID
+}
+
+func (b *ScheduledPurchase) GetDestPeer() string {
+ return b.DestPeerID
+}
+
+func (b *ScheduledBooking) GetKey() string {
+ return b.ResourceID + "/" + b.InstanceID + "/" + tools.BOOKING.String()
+}
+
+func (b *ScheduledPurchase) GetKey() string {
+ return b.ResourceID + "/" + b.InstanceID + "/" + tools.PURCHASE_RESOURCE.String()
+}
+
+func (b *ScheduledBooking) SetIsDraft(ok bool) {
+ b.IsDraft = ok
+}
+
+func (b *ScheduledPurchase) SetIsDraft(ok bool) {
+ b.IsDraft = ok
+}
+
+func (b *ScheduledBooking) SetSchedulerPeerID(peerID string) {
+ b.SchedulerPeerID = peerID
+}
+
+func (b *ScheduledPurchase) SetSchedulerPeerID(peerID string) {
+ b.SchedulerPeerID = peerID
+}
+
+func (b *ScheduledBooking) SetExecutionsID(ei string) {
+ b.ExecutionsID = ei
+}
+
+func (b *ScheduledPurchase) SetExecutionsID(ei string) {
+ b.ExecutionsID = ei
+}
diff --git a/infrastructure/session.go b/infrastructure/session.go
new file mode 100644
index 0000000..f1fb138
--- /dev/null
+++ b/infrastructure/session.go
@@ -0,0 +1,345 @@
+package infrastructure
+
+import (
+ "encoding/json"
+ "fmt"
+ "oc-scheduler/infrastructure/scheduling"
+ "time"
+
+ oclib "cloud.o-forge.io/core/oc-lib"
+ "cloud.o-forge.io/core/oc-lib/dbs"
+ "cloud.o-forge.io/core/oc-lib/models/booking"
+ "cloud.o-forge.io/core/oc-lib/models/order"
+ "cloud.o-forge.io/core/oc-lib/models/peer"
+ "cloud.o-forge.io/core/oc-lib/models/resources/purchase_resource"
+ "cloud.o-forge.io/core/oc-lib/models/utils"
+ "cloud.o-forge.io/core/oc-lib/models/workflow_execution"
+ "cloud.o-forge.io/core/oc-lib/tools"
+)
+
+// removeResourcePayload is sent via NATS REMOVE_RESOURCE so the receiver can
+// verify the delete order comes from the original scheduler session.
+type removeResourcePayload struct {
+ ID string `json:"id"`
+ SchedulerPeerID string `json:"scheduler_peer_id"`
+ ExecutionsID string `json:"executions_id"`
+}
+
+// ---------------------------------------------------------------------------
+// DB helpers — objects are found via executions_id
+// ---------------------------------------------------------------------------
+
+func sessionIDFilter(field, id string) *dbs.Filters {
+ return &dbs.Filters{
+ And: map[string][]dbs.Filter{
+ field: {{Operator: dbs.EQUAL.String(), Value: id}},
+ },
+ }
+}
+
+func loadSession(executionsID string, dt tools.DataType) []scheduling.SchedulerObject {
+ results := oclib.NewRequestAdmin(oclib.LibDataEnum(dt), nil).Search(
+ sessionIDFilter("executions_id", executionsID), "", true)
+ out := make([]scheduling.SchedulerObject, 0, len(results.Data))
+ for _, obj := range results.Data {
+ out = append(out, scheduling.ToSchedulerObject(dt, obj))
+ }
+ return out
+}
+
+func loadSessionExecs(executionsID string) []*workflow_execution.WorkflowExecution {
+ adminReq := &tools.APIRequest{Admin: true}
+ results, _, _ := workflow_execution.NewAccessor(adminReq).Search(
+ sessionIDFilter("executions_id", executionsID), "", true)
+ out := make([]*workflow_execution.WorkflowExecution, 0, len(results))
+ for _, obj := range results {
+ if exec, ok := obj.(*workflow_execution.WorkflowExecution); ok {
+ out = append(out, exec)
+ }
+ }
+ return out
+}
+
+func loadSessionOrder(executionsID string) *order.Order {
+ adminReq := &tools.APIRequest{Admin: true}
+ results, _, _ := order.NewAccessor(adminReq).Search(
+ sessionIDFilter("executions_id", executionsID), "", true)
+ for _, obj := range results {
+ if o, ok := obj.(*order.Order); ok {
+ return o
+ }
+ }
+ return nil
+}
+
+// ---------------------------------------------------------------------------
+// Session upsert
+// ---------------------------------------------------------------------------
+
+// UpsertSessionDrafts creates or updates draft bookings/purchases/executions for a
+// Check session. Existing objects are found via the DB (executions_id).
+// Called on first successful check and on user date changes.
+//
+// - bookings/purchases: upserted by (resourceID, instanceID); stale ones deleted
+// - executions: replaced on every call (dates may have changed)
+// - order: created once, updated on subsequent calls
+func (ws *WorkflowSchedule) UpsertSessionDrafts(wfID, executionsID string, selfID *peer.Peer, request *tools.APIRequest) {
+ _, _, execs, purchases, bookings, err := ws.GetBuyAndBook(wfID, request)
+ if err != nil {
+ return
+ }
+
+ adminReq := &tools.APIRequest{Admin: true}
+
+ // --- bookings ---
+ existing := map[string]scheduling.SchedulerObject{}
+ seen := map[string]bool{}
+ for dt, datas := range map[tools.DataType][]scheduling.SchedulerObject{
+ tools.BOOKING: bookings, tools.PURCHASE_RESOURCE: purchases,
+ } {
+ for _, bk := range loadSession(executionsID, dt) {
+ existing[bk.GetKey()] = bk
+ }
+ upsertSessionDrafts(dt, datas, existing, seen, selfID, executionsID, request)
+ for key, prev := range existing {
+ if !seen[key] {
+ deleteScheduling(dt, prev, selfID, request)
+ }
+ }
+
+ }
+ // --- executions: replace on every call (dates may have changed) ---
+ for _, old := range loadSessionExecs(executionsID) {
+ UnregisterExecLock(old.GetID())
+ workflow_execution.NewAccessor(adminReq).DeleteOne(old.GetID())
+ }
+ for _, exec := range execs {
+ exec.ExecutionsID = executionsID
+ exec.IsDraft = true
+ ex, _, err := utils.GenericStoreOne(exec, workflow_execution.NewAccessor(adminReq))
+ if err == nil {
+ RegisterExecLock(ex.GetID())
+ go WatchExecDeadline(ex.GetID(), exec.ExecDate, selfID, request)
+ }
+ }
+
+ // --- order: create once, update on subsequent calls ---
+ if existing := loadSessionOrder(executionsID); existing == nil {
+ ws.GenerateOrder(purchases, bookings, executionsID, request)
+ } else {
+ for _, purch := range purchases {
+ existing.Purchases = append(
+ existing.Purchases, scheduling.FromSchedulerObject(tools.PURCHASE_RESOURCE, purch).(*purchase_resource.PurchaseResource))
+ }
+ for _, b := range bookings {
+ existing.Bookings = append(
+ existing.Bookings, scheduling.FromSchedulerObject(tools.BOOKING, b).(*booking.Booking))
+ }
+ utils.GenericRawUpdateOne(existing, existing.GetID(), order.NewAccessor(adminReq))
+ }
+}
+
+// ---------------------------------------------------------------------------
+// Session lifecycle
+// ---------------------------------------------------------------------------
+
+func upsertSessionDrafts(dt tools.DataType, datas []scheduling.SchedulerObject, existing map[string]scheduling.SchedulerObject,
+ seen map[string]bool, selfID *peer.Peer,
+ executionsID string, request *tools.APIRequest) {
+ fmt.Println("UpsertSessionDrafts", len(datas), len(existing))
+ for _, bk := range datas {
+ bk.SetSchedulerPeerID(selfID.PeerID)
+ bk.SetExecutionsID(executionsID)
+ seen[bk.GetKey()] = true
+ if prev, ok := existing[bk.GetKey()]; ok {
+ bk.SetID(prev.GetID())
+ bk.SetIsDraft(false)
+ // Convert to concrete type (Booking/PurchaseResource) so that
+ // GenericRawUpdateOne serializes the real struct, not the wrapper.
+ propagateWriteResource(
+ scheduling.FromSchedulerDBObject(dt, bk), bk.GetDestPeer(), dt, selfID, request)
+ } else {
+ errCh := make(chan error, 1)
+ propagateResource(scheduling.FromSchedulerDBObject(dt, bk), bk.GetDestPeer(), dt, selfID, request, errCh)
+ <-errCh
+ }
+ }
+}
+
+// CleanupSession deletes all draft bookings/purchases/executions/order for a
+// session (called when the WebSocket closes without a confirm).
+func CleanupSession(self *peer.Peer, executionsID string, selfID *peer.Peer, request *tools.APIRequest) {
+ adminReq := &tools.APIRequest{Admin: true}
+ for _, exec := range loadSessionExecs(executionsID) {
+ UnscheduleExecution(exec.GetID(), selfID, request)
+ workflow_execution.NewAccessor(adminReq).DeleteOne(exec.GetID())
+ }
+ if o := loadSessionOrder(executionsID); o != nil {
+ order.NewAccessor(adminReq).DeleteOne(o.GetID())
+ }
+}
+
+// ConfirmSession flips all session drafts to IsDraft=false and propagates them.
+// The considers mechanism then transitions executions to IsDraft=false once
+// all remote peers acknowledge.
+func ConfirmSession(executionsID string, selfID *peer.Peer, request *tools.APIRequest) error {
+ for _, dt := range []tools.DataType{tools.BOOKING, tools.PURCHASE_RESOURCE} {
+ for _, bk := range loadSession(executionsID, dt) {
+ bk.SetIsDraft(false)
+ propagateWriteResource(
+ scheduling.FromSchedulerDBObject(dt, bk), bk.GetDestPeer(), dt, selfID, request)
+ }
+ }
+ return nil
+}
+
+// confirmSessionOrder sets the order IsDraft=false once all considers are received.
+func confirmSessionOrder(executionsID string, adminReq *tools.APIRequest) {
+ if o := loadSessionOrder(executionsID); o != nil {
+ o.IsDraft = false
+ utils.GenericRawUpdateOne(o, o.GetID(), order.NewAccessor(adminReq))
+ }
+}
+
+// ---------------------------------------------------------------------------
+// Propagation
+// ---------------------------------------------------------------------------
+
+// propagateWriteResource routes a booking/purchase write to its destination:
+// - local peer → DB upsert; emits considers on confirm (IsDraft=false)
+// - remote peer → NATS CREATE_RESOURCE (receiver upserts)
+func propagateWriteResource(obj utils.DBObject, destPeerID string, dt tools.DataType, selfID *peer.Peer, request *tools.APIRequest) {
+ if destPeerID == selfID.GetID() {
+ if _, _, err := utils.GenericRawUpdateOne(obj, obj.GetID(), obj.GetAccessor(request)); err != nil {
+ fmt.Printf("propagateWriteResource: local update failed for %s %s: %v\n", dt, obj.GetID(), err)
+ return
+ }
+ if dt == tools.BOOKING {
+ go refreshSelfPlanner(selfID.PeerID, request)
+ }
+ fmt.Println("IS DRAFTED", obj.IsDrafted())
+ if !obj.IsDrafted() {
+ if payload, err := json.Marshal(&executionConsidersPayload{
+ ID: obj.GetID(),
+ }); err == nil {
+ go updateExecutionState(payload, dt)
+ }
+ }
+ return
+ }
+ payload, err := json.Marshal(obj)
+ if err != nil {
+ return
+ }
+ tools.NewNATSCaller().SetNATSPub(tools.CREATE_RESOURCE, tools.NATSResponse{
+ FromApp: "oc-scheduler",
+ Datatype: dt,
+ Method: int(tools.CREATE_RESOURCE),
+ Payload: payload,
+ })
+}
+
+// deleteBooking deletes a booking from its destination peer (local DB or NATS).
+func deleteScheduling(dt tools.DataType, bk scheduling.SchedulerObject, selfID *peer.Peer, request *tools.APIRequest) {
+ if bk.GetDestPeer() == selfID.GetID() {
+ oclib.NewRequestAdmin(oclib.LibDataEnum(dt), nil).DeleteOne(bk.GetID())
+ go refreshSelfPlanner(selfID.PeerID, request)
+ return
+ }
+ emitNATSRemove(bk.GetID(), bk.GetPeerSession(), bk.GetExecutionsId(), dt)
+}
+
+// emitNATSRemove sends a REMOVE_RESOURCE event to the remote peer carrying
+// auth fields so the receiver can verify the delete is legitimate.
+func emitNATSRemove(id, schedulerPeerID, executionsID string, dt tools.DataType) {
+ payload, _ := json.Marshal(removeResourcePayload{
+ ID: id,
+ SchedulerPeerID: schedulerPeerID,
+ ExecutionsID: executionsID,
+ })
+ tools.NewNATSCaller().SetNATSPub(tools.REMOVE_RESOURCE, tools.NATSResponse{
+ FromApp: "oc-scheduler",
+ Datatype: dt,
+ Method: int(tools.REMOVE_RESOURCE),
+ Payload: payload,
+ })
+}
+
+// ---------------------------------------------------------------------------
+// Deadline watchers
+// ---------------------------------------------------------------------------
+
+// WatchExecDeadline purges all unconfirmed bookings/purchases for an execution
+// one minute before its scheduled start, to avoid stale drafts blocking resources.
+// If the deadline has already passed (e.g. after a process restart), it fires immediately.
+func WatchExecDeadline(executionID string, execDate time.Time, selfID *peer.Peer, request *tools.APIRequest) {
+ fmt.Println("WatchExecDeadline")
+ delay := time.Until(execDate.UTC().Add(-1 * time.Minute))
+ if delay <= 0 {
+ go purgeUnconfirmedExecution(executionID, selfID, request)
+ return
+ }
+ time.AfterFunc(delay, func() { purgeUnconfirmedExecution(executionID, selfID, request) })
+}
+
+func purgeUnconfirmedExecution(executionID string, selfID *peer.Peer, request *tools.APIRequest) {
+ acc := workflow_execution.NewAccessor(&tools.APIRequest{Admin: true})
+ UnscheduleExecution(executionID, selfID, request)
+ _, _, err := acc.DeleteOne(executionID)
+ fmt.Printf("purgeUnconfirmedExecution: cleaned up resources for execution %s\n", err)
+}
+
+// RecoverDraftExecutions is called at startup to restore deadline watchers for
+// draft executions that survived a process restart. Executions already past
+// their deadline are purged immediately.
+func RecoverDraftExecutions() {
+ adminReq := &tools.APIRequest{Admin: true}
+ var selfID *peer.Peer
+ for selfID == nil {
+ selfID, _ = oclib.GetMySelf()
+ if selfID == nil {
+ time.Sleep(5 * time.Second)
+ }
+ }
+ results, _, _ := workflow_execution.NewAccessor(adminReq).Search(nil, "*", true)
+ for _, obj := range results {
+ exec, ok := obj.(*workflow_execution.WorkflowExecution)
+ if !ok {
+ continue
+ }
+ RegisterExecLock(exec.GetID())
+ go WatchExecDeadline(exec.GetID(), exec.ExecDate, selfID, adminReq)
+ }
+ fmt.Printf("RecoverDraftExecutions: recovered %d draft executions\n", len(results))
+}
+
+// ---------------------------------------------------------------------------
+// Unschedule
+// ---------------------------------------------------------------------------
+
+// UnscheduleExecution deletes all bookings for an execution (via PeerBookByGraph)
+// then deletes the execution itself.
+func UnscheduleExecution(executionID string, selfID *peer.Peer, request *tools.APIRequest) error {
+ fmt.Println("UnscheduleExecution")
+ adminReq := &tools.APIRequest{Admin: true}
+ res, _, err := workflow_execution.NewAccessor(adminReq).LoadOne(executionID)
+ if err != nil || res == nil {
+ return fmt.Errorf("execution %s not found: %w", executionID, err)
+ }
+ exec := res.(*workflow_execution.WorkflowExecution)
+ for _, byResource := range exec.PeerBookByGraph {
+ for _, bookingIDs := range byResource {
+ for _, bkID := range bookingIDs {
+ bkRes, _, loadErr := booking.NewAccessor(adminReq).LoadOne(bkID)
+ fmt.Println("UnscheduleExecution", bkID, loadErr)
+ if loadErr != nil || bkRes == nil {
+ continue
+ }
+ deleteScheduling(tools.BOOKING, scheduling.ToSchedulerObject(tools.BOOKING, bkRes), selfID, request)
+ }
+ }
+ }
+ workflow_execution.NewAccessor(adminReq).DeleteOne(executionID)
+ UnregisterExecLock(executionID)
+ return nil
+}
diff --git a/main.go b/main.go
index 047806a..0080f5c 100644
--- a/main.go
+++ b/main.go
@@ -36,5 +36,6 @@ func main() {
go infrastructure.ListenNATS()
go infrastructure.InitSelfPlanner()
go infrastructure.ListenConfirm()
+ go infrastructure.RecoverDraftExecutions()
beego.Run()
}
diff --git a/oc-scheduler b/oc-scheduler
index f133e3e..a8f27c7 100755
Binary files a/oc-scheduler and b/oc-scheduler differ
diff --git a/routers/commentsRouter.go b/routers/commentsRouter.go
index df46195..ed2cfbe 100644
--- a/routers/commentsRouter.go
+++ b/routers/commentsRouter.go
@@ -133,15 +133,6 @@ func init() {
Filters: nil,
Params: nil})
- beego.GlobalControllerRouter["oc-scheduler/controllers:WorkflowSchedulerController"] = append(beego.GlobalControllerRouter["oc-scheduler/controllers:WorkflowSchedulerController"],
- beego.ControllerComments{
- Method: "Schedule",
- Router: `/:id`,
- AllowHTTPMethods: []string{"post"},
- MethodParams: param.Make(),
- Filters: nil,
- Params: nil})
-
beego.GlobalControllerRouter["oc-scheduler/controllers:WorkflowSchedulerController"] = append(beego.GlobalControllerRouter["oc-scheduler/controllers:WorkflowSchedulerController"],
beego.ControllerComments{
Method: "UnSchedule",
@@ -151,15 +142,6 @@ func init() {
Filters: nil,
Params: nil})
- beego.GlobalControllerRouter["oc-scheduler/controllers:WorkflowSchedulerController"] = append(beego.GlobalControllerRouter["oc-scheduler/controllers:WorkflowSchedulerController"],
- beego.ControllerComments{
- Method: "CheckStream",
- Router: `/:id/check`,
- AllowHTTPMethods: []string{"get"},
- MethodParams: param.Make(),
- Filters: nil,
- Params: nil})
-
beego.GlobalControllerRouter["oc-scheduler/controllers:WorkflowSchedulerController"] = append(beego.GlobalControllerRouter["oc-scheduler/controllers:WorkflowSchedulerController"],
beego.ControllerComments{
Method: "SearchScheduledDraftOrder",
diff --git a/routers/router.go b/routers/router.go
index c87dfb8..b6f3982 100644
--- a/routers/router.go
+++ b/routers/router.go
@@ -8,6 +8,7 @@
package routers
import (
+ "net/http"
"oc-scheduler/controllers"
beego "github.com/beego/beego/v2/server/web"
@@ -46,4 +47,8 @@ func init() {
)
beego.AddNamespace(ns)
+
+ // WebSocket route registered outside the Beego pipeline to avoid the
+ // spurious WriteHeader that prevents the 101 Switching Protocols upgrade.
+ beego.Handler("/oc/:id/check", http.HandlerFunc(controllers.CheckStreamHandler))
}
diff --git a/swagger/swagger.json b/swagger/swagger.json
index 2bf0b47..f750c5b 100644
--- a/swagger/swagger.json
+++ b/swagger/swagger.json
@@ -260,6 +260,81 @@
}
}
},
+ "/verification/": {
+ "get": {
+ "tags": [
+ "verification"
+ ],
+ "description": "find verification by id\n\u003cbr\u003e",
+ "operationId": "ExecutionVerificationController.GetAll",
+ "parameters": [
+ {
+ "in": "query",
+ "name": "is_draft",
+ "description": "draft wished",
+ "type": "string"
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "{booking} models.booking"
+ }
+ }
+ }
+ },
+ "/verification/{id}": {
+ "get": {
+ "tags": [
+ "verification"
+ ],
+ "description": "find verification by id\n\u003cbr\u003e",
+ "operationId": "ExecutionVerificationController.Get",
+ "parameters": [
+ {
+ "in": "path",
+ "name": "id",
+ "description": "the id you want to get",
+ "required": true,
+ "type": "string"
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "{booking} models.booking"
+ }
+ }
+ },
+ "put": {
+ "tags": [
+ "verification"
+ ],
+ "description": "create computes\n\u003cbr\u003e",
+ "operationId": "ExecutionVerificationController.Update",
+ "parameters": [
+ {
+ "in": "path",
+ "name": "id",
+ "description": "the compute id you want to get",
+ "required": true,
+ "type": "string"
+ },
+ {
+ "in": "body",
+ "name": "body",
+ "description": "The compute content",
+ "required": true,
+ "schema": {
+ "$ref": "#/definitions/models.compute"
+ }
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "{compute} models.compute"
+ }
+ }
+ }
+ },
"/version/": {
"get": {
"tags": [
@@ -289,98 +364,27 @@
}
},
"/{id}": {
- "post": {
- "tags": [
- "oc-scheduler/controllersWorkflowSchedulerController"
- ],
- "description": "schedule workflow\n\u003cbr\u003e",
- "operationId": "WorkflowSchedulerController.Schedule",
- "parameters": [
- {
- "in": "path",
- "name": "id",
- "description": "id execution",
- "required": true,
- "type": "string"
- },
- {
- "in": "body",
- "name": "body",
- "description": "The compute content",
- "required": true,
- "schema": {
- "$ref": "#/definitions/models.compute"
- }
- }
- ],
- "responses": {
- "200": {
- "description": "{workspace} models.workspace"
- }
- }
- },
"delete": {
"tags": [
"oc-scheduler/controllersWorkflowSchedulerController"
],
- "description": "schedule workflow\n\u003cbr\u003e",
+ "description": "unschedule a workflow execution: deletes its bookings on all peers then deletes the execution.\n\u003cbr\u003e",
"operationId": "WorkflowSchedulerController.UnSchedule",
"parameters": [
{
"in": "path",
"name": "id",
- "description": "id execution",
+ "description": "execution id",
"required": true,
"type": "string"
- },
- {
- "in": "body",
- "name": "body",
- "description": "The compute content",
- "required": true,
- "schema": {
- "$ref": "#/definitions/models.compute"
- }
}
],
"responses": {
"200": {
- "description": "{workspace} models.workspace"
- }
- }
- }
- },
- "/{id}/check": {
- "get": {
- "tags": [
- "oc-scheduler/controllersWorkflowSchedulerController"
- ],
- "description": "WebSocket stream of slot availability for a workflow.\n\u003cbr\u003e",
- "operationId": "WorkflowSchedulerController.CheckStream",
- "parameters": [
- {
- "in": "path",
- "name": "id",
- "description": "workflow id",
- "required": true,
- "type": "string"
- },
- {
- "in": "query",
- "name": "as_possible",
- "description": "find nearest free slot from now",
- "type": "boolean"
- },
- {
- "in": "query",
- "name": "preemption",
- "description": "validate anyway, raise warnings",
- "type": "boolean"
- }
- ],
- "responses": {
- "101": {
- "description": ""
+ "description": "",
+ "schema": {
+ "$ref": "#/definitions/map[string]interface{}"
+ }
}
}
}
@@ -410,6 +414,10 @@
}
},
"definitions": {
+ "map[string]interface{}": {
+ "title": "map[string]interface{}",
+ "type": "object"
+ },
"models.compute": {
"title": "compute",
"type": "object"
@@ -428,6 +436,10 @@
"name": "booking",
"description": "Operations about workspace\n"
},
+ {
+ "name": "verification",
+ "description": "Operations about workspace\n"
+ },
{
"name": "execution",
"description": "Operations about workflow\n"
diff --git a/swagger/swagger.yml b/swagger/swagger.yml
index 726e6c1..2f36b3a 100644
--- a/swagger/swagger.yml
+++ b/swagger/swagger.yml
@@ -13,75 +13,24 @@ info:
basePath: /oc/
paths:
/{id}:
- post:
- tags:
- - oc-scheduler/controllersWorkflowSchedulerController
- description: |-
- schedule workflow
-
- operationId: WorkflowSchedulerController.Schedule
- parameters:
- - in: path
- name: id
- description: id execution
- required: true
- type: string
- - in: body
- name: body
- description: The compute content
- required: true
- schema:
- $ref: '#/definitions/models.compute'
- responses:
- "200":
- description: '{workspace} models.workspace'
delete:
tags:
- oc-scheduler/controllersWorkflowSchedulerController
description: |-
- schedule workflow
+ unschedule a workflow execution: deletes its bookings on all peers then deletes the execution.
operationId: WorkflowSchedulerController.UnSchedule
parameters:
- in: path
name: id
- description: id execution
+ description: execution id
required: true
type: string
- - in: body
- name: body
- description: The compute content
- required: true
- schema:
- $ref: '#/definitions/models.compute'
responses:
"200":
- description: '{workspace} models.workspace'
- /{id}/check:
- get:
- tags:
- - oc-scheduler/controllersWorkflowSchedulerController
- description: |-
- WebSocket stream of slot availability for a workflow.
-
- operationId: WorkflowSchedulerController.CheckStream
- parameters:
- - in: path
- name: id
- description: workflow id
- required: true
- type: string
- - in: query
- name: as_possible
- description: find nearest free slot from now
- type: boolean
- - in: query
- name: preemption
- description: validate anyway, raise warnings
- type: boolean
- responses:
- "101":
description: ""
+ schema:
+ $ref: '#/definitions/map[string]interface{}'
/{id}/order:
get:
tags:
@@ -277,6 +226,61 @@ paths:
responses:
"200":
description: '{workspace} models.workspace'
+ /verification/:
+ get:
+ tags:
+ - verification
+ description: |-
+ find verification by id
+
+ operationId: ExecutionVerificationController.GetAll
+ parameters:
+ - in: query
+ name: is_draft
+ description: draft wished
+ type: string
+ responses:
+ "200":
+ description: '{booking} models.booking'
+ /verification/{id}:
+ get:
+ tags:
+ - verification
+ description: |-
+ find verification by id
+
+ operationId: ExecutionVerificationController.Get
+ parameters:
+ - in: path
+ name: id
+ description: the id you want to get
+ required: true
+ type: string
+ responses:
+ "200":
+ description: '{booking} models.booking'
+ put:
+ tags:
+ - verification
+ description: |-
+ create computes
+
+ operationId: ExecutionVerificationController.Update
+ parameters:
+ - in: path
+ name: id
+ description: the compute id you want to get
+ required: true
+ type: string
+ - in: body
+ name: body
+ description: The compute content
+ required: true
+ schema:
+ $ref: '#/definitions/models.compute'
+ responses:
+ "200":
+ description: '{compute} models.compute'
/version/:
get:
tags:
@@ -300,6 +304,9 @@ paths:
"200":
description: ""
definitions:
+ map[string]interface{}:
+ title: map[string]interface{}
+ type: object
models.compute:
title: compute
type: object
@@ -313,6 +320,9 @@ tags:
- name: booking
description: |
Operations about workspace
+- name: verification
+ description: |
+ Operations about workspace
- name: execution
description: |
Operations about workflow
diff --git a/ws.go b/ws.go
index 77e530f..a265880 100644
--- a/ws.go
+++ b/ws.go
@@ -23,7 +23,7 @@ func main() {
// ws://localhost:8090/oc//check
// ws://localhost:8090/oc//check?as_possible=true
// ws://localhost:8090/oc//check?as_possible=true&preemption=true
- url := "ws://localhost:8090/oc/WORKFLOW_ID/check?as_possible=true"
+ url := "ws://localhost:8090/oc/58314c99-c595-4ca2-8b5e-822a6774efed/check?as_possible=true"
token := ""
// Body JSON envoyé comme premier message WebSocket (WorkflowSchedule).
// Seuls start + duration_s sont requis si as_possible=true.
@@ -80,6 +80,14 @@ func main() {
}
}()
+ // Après 5 secondes, simule un changement de date côté front (now + 3 min).
+ dateChangeTick := time.NewTimer(10 * time.Second)
+ defer dateChangeTick.Stop()
+
+ // Après 15 secondes, simule la confirmation du scheduling par le client.
+ confirmTick := time.NewTimer(15 * time.Second)
+ defer confirmTick.Stop()
+
idleTimer := time.NewTimer(time.Duration(*timeout) * time.Second)
defer idleTimer.Stop()
@@ -94,6 +102,20 @@ func main() {
case <-idleTimer.C:
fmt.Printf("Timeout (%ds) — aucun message reçu, fermeture.\n", *timeout)
return
+ case <-dateChangeTick.C:
+ newStart := time.Now().UTC().Add(3 * time.Minute)
+ update := `{"start":"` + newStart.Format(time.RFC3339) + `","duration_s":3600}`
+ fmt.Printf("\n[sim] Envoi mise à jour de date → %s\n\n", update)
+ if err := websocket.Message.Send(ws, update); err != nil {
+ fmt.Printf("Erreur envoi mise à jour : %v\n", err)
+ return
+ }
+ case <-confirmTick.C:
+ fmt.Println("\n[sim] Envoi confirmation du scheduling → {\"confirm\":true}\n")
+ if err := websocket.Message.Send(ws, `{"confirm":true}`); err != nil {
+ fmt.Printf("Erreur envoi confirmation : %v\n", err)
+ return
+ }
case raw := <-msgs:
idleTimer.Reset(time.Duration(*timeout) * time.Second)
var data any