Scheduling Node

This commit is contained in:
mr
2026-03-17 11:58:27 +01:00
parent b9df0b2731
commit 7fbc077cb1
20 changed files with 2281 additions and 1504 deletions

View File

@@ -58,7 +58,6 @@ func (o *LokiController) GetLogs() {
path += "?query={" + strings.Join(query, ", ") + "}&start=" + start + "&end=" + end
resp, err := http.Get(config.GetConfig().LokiUrl + path) // CALL
fmt.Println(resp, path)
if err != nil {
o.Ctx.ResponseWriter.WriteHeader(422)
o.Data["json"] = map[string]string{"error": err.Error()}

265
controllers/sheduler.go Normal file
View File

@@ -0,0 +1,265 @@
package controllers
import (
"fmt"
"net/http"
"oc-scheduler/infrastructure"
"strings"
oclib "cloud.o-forge.io/core/oc-lib"
"cloud.o-forge.io/core/oc-lib/dbs"
"cloud.o-forge.io/core/oc-lib/tools"
beego "github.com/beego/beego/v2/server/web"
"github.com/google/uuid"
gorillaws "github.com/gorilla/websocket"
)
var orderCollection = oclib.LibDataEnum(oclib.ORDER)
var logger = oclib.GetLogger()
// Operations about workflow
type WorkflowSchedulerController struct {
beego.Controller
}
var wsUpgrader = gorillaws.Upgrader{
CheckOrigin: func(r *http.Request) bool { return true },
}
// CheckStreamHandler is the WebSocket handler for slot availability checking.
// It is invoked via the CheckStream controller method.
// Query params: as_possible=true, preemption=true
func CheckStreamHandler(w http.ResponseWriter, r *http.Request) {
wfID := strings.TrimSuffix(
strings.TrimPrefix(r.URL.Path, "/oc/"),
"/check",
)
q := r.URL.Query()
asap := q.Get("as_possible") == "true"
preemption := q.Get("preemption") == "true"
user, peerID, groups := oclib.ExtractTokenInfo(*r)
req := &tools.APIRequest{
Username: user,
PeerID: peerID,
Groups: groups,
Caller: nil,
Admin: true,
}
watchedPeers, err := infrastructure.GetWorkflowPeerIDs(wfID, req)
fmt.Println("Here my watched peers involved in workflow", watchedPeers)
if err != nil {
http.Error(w, `{"code":404,"error":"`+err.Error()+`"}`, http.StatusNotFound)
return
}
conn, err := wsUpgrader.Upgrade(w, r, nil)
if err != nil {
return
}
var ws infrastructure.WorkflowSchedule
if err := conn.ReadJSON(&ws); err != nil {
conn.Close()
return
}
plannerCh, plannerUnsub := infrastructure.SubscribePlannerUpdates(watchedPeers)
wfCh, wfUnsub := infrastructure.SubscribeWorkflowUpdates(wfID)
executionsID := uuid.New().String()
ownedPeers := infrastructure.RequestPlannerRefresh(watchedPeers, executionsID)
selfID, err := oclib.GetMySelf()
if err != nil || selfID == nil {
logger.Err(err).Msg(err.Error())
return
}
selfPeerID := ""
if selfID != nil {
selfPeerID = selfID.PeerID
}
// scheduled=true once bookings/purchases/exec have been created for this session.
scheduled := false
confirmed := false
defer func() {
conn.Close()
plannerUnsub()
wfUnsub()
infrastructure.ReleaseRefreshOwnership(ownedPeers, executionsID)
if !confirmed {
infrastructure.CleanupSession(selfID, executionsID, selfID, req)
}
}()
// pushCheck runs an availability check and sends the result to the client.
// If reschedule=true and the slot is available, it also creates/updates
// bookings, purchases and the execution draft for this session.
pushCheck := func(reschedule bool) error {
result, checkErr := ws.Check(wfID, asap, preemption, req)
if checkErr != nil {
return checkErr
}
if result.Available && reschedule {
// Sync the resolved start/end back to ws so that UpsertSessionDrafts
// creates bookings/purchases with the actual scheduled dates (not the
// raw client value which may be zero or pre-asapBuffer).
ws.Start = result.Start
if result.End != nil {
ws.End = result.End
}
ws.UpsertSessionDrafts(wfID, executionsID, selfID, req)
scheduled = true
}
result.SchedulingID = executionsID
return conn.WriteJSON(result)
}
// Initial check + schedule.
if err := pushCheck(true); err != nil {
return
}
updateCh := make(chan infrastructure.WorkflowSchedule, 1)
closeCh := make(chan struct{})
go func() {
defer close(closeCh)
for {
var updated infrastructure.WorkflowSchedule
if err := conn.ReadJSON(&updated); err != nil {
return
}
select {
case updateCh <- updated:
default:
<-updateCh
updateCh <- updated
}
}
}()
for {
select {
case updated := <-updateCh:
if updated.Confirm {
// Confirm: flip bookings/purchases to IsDraft=false, then let
// the considers mechanism transition exec to IsDraft=false.
ws.UUID = executionsID
_, _, _, schedErr := ws.Schedules(wfID, req)
if schedErr != nil {
_ = conn.WriteJSON(map[string]interface{}{
"error": schedErr.Error(),
})
return
}
confirmed = true
return
}
infrastructure.CleanupSession(selfID, executionsID, selfID, req)
// Detect whether the user changed dates or instances.
datesChanged := !updated.Start.Equal(ws.Start) ||
updated.DurationS != ws.DurationS ||
(updated.End == nil) != (ws.End == nil) ||
(updated.End != nil && ws.End != nil && !updated.End.Equal(*ws.End))
ws = updated
// Reschedule when dates changed or we haven't scheduled yet.
if err := pushCheck(datesChanged || !scheduled); err != nil {
return
}
case remotePeerID := <-plannerCh:
if remotePeerID == selfPeerID {
// Our own planner updated (caused by our local booking store).
// Just resend the current availability result without rescheduling
// to avoid an infinite loop.
result, checkErr := ws.Check(wfID, asap, preemption, req)
if checkErr == nil {
result.SchedulingID = executionsID
_ = conn.WriteJSON(result)
}
continue
}
// A remote peer's planner changed. Re-check; if our slot is now
// taken and we were already scheduled, reschedule at the new slot.
result, checkErr := ws.Check(wfID, asap, preemption, req)
if checkErr != nil {
return
}
if !result.Available && scheduled {
// Move to the next free slot and reschedule.
if result.NextSlot != nil {
ws.Start = *result.NextSlot
}
if err := pushCheck(true); err != nil {
return
}
} else {
result.SchedulingID = executionsID
_ = conn.WriteJSON(result)
}
case <-wfCh:
if newPeers, err := infrastructure.GetWorkflowPeerIDs(wfID, req); err == nil {
plannerUnsub()
watchedPeers = newPeers
plannerCh, plannerUnsub = infrastructure.SubscribePlannerUpdates(newPeers)
newOwned := infrastructure.RequestPlannerRefresh(newPeers, executionsID)
ownedPeers = append(ownedPeers, newOwned...)
}
if err := pushCheck(false); err != nil {
return
}
case <-closeCh:
return
}
}
}
// @Title UnSchedule
// @Description unschedule a workflow execution: deletes its bookings on all peers then deletes the execution.
// @Param id path string true "execution id"
// @Success 200 {object} map[string]interface{}
// @router /:id [delete]
func (o *WorkflowSchedulerController) UnSchedule() {
user, peerID, groups := oclib.ExtractTokenInfo(*o.Ctx.Request)
executionID := o.Ctx.Input.Param(":id")
req := &tools.APIRequest{
Username: user,
PeerID: peerID,
Groups: groups,
Admin: true,
}
selfID, _ := oclib.GetMySelf()
if err := infrastructure.UnscheduleExecution(executionID, selfID, req); err != nil {
o.Data["json"] = map[string]interface{}{"code": 404, "error": err.Error()}
} else {
o.Data["json"] = map[string]interface{}{"code": 200, "error": ""}
}
o.ServeJSON()
}
// @Title SearchScheduledDraftOrder
// @Description schedule workflow
// @Param id path string true "id execution"
// @Success 200 {workspace} models.workspace
// @router /:id/order [get]
func (o *WorkflowSchedulerController) SearchScheduledDraftOrder() {
_, peerID, _ := oclib.ExtractTokenInfo(*o.Ctx.Request)
id := o.Ctx.Input.Param(":id")
filter := &dbs.Filters{
And: map[string][]dbs.Filter{
"workflow_id": {{Operator: dbs.EQUAL.String(), Value: id}},
"order_by": {{Operator: dbs.EQUAL.String(), Value: peerID}},
},
}
o.Data["json"] = oclib.NewRequestAdmin(orderCollection, nil).Search(filter, "", true)
//o.Data["json"] = oclib.NewRequest(orderCollection, user, peerID, groups, nil).Search(filter, "", true)
o.ServeJSON()
}

View File

@@ -1,270 +0,0 @@
package controllers
import (
"encoding/json"
"fmt"
"net/http"
"oc-scheduler/infrastructure"
"strings"
oclib "cloud.o-forge.io/core/oc-lib"
"cloud.o-forge.io/core/oc-lib/dbs"
"cloud.o-forge.io/core/oc-lib/tools"
beego "github.com/beego/beego/v2/server/web"
"github.com/google/uuid"
gorillaws "github.com/gorilla/websocket"
)
var orderCollection = oclib.LibDataEnum(oclib.ORDER)
var logger = oclib.GetLogger()
// Operations about workflow
type WorkflowSchedulerController struct {
beego.Controller
}
// @Title Schedule
// @Description schedule workflow
// @Param id path string true "id execution"
// @Param body body models.compute true "The compute content"
// @Success 200 {workspace} models.workspace
// @router /:id [post]
func (o *WorkflowSchedulerController) Schedule() {
logger := oclib.GetLogger()
code := 200
e := ""
user, peerID, groups := oclib.ExtractTokenInfo(*o.Ctx.Request)
wfId := o.Ctx.Input.Param(":id")
var resp *infrastructure.WorkflowSchedule
json.Unmarshal(o.Ctx.Input.CopyBody(100000), &resp)
logger.Info().Msg("Booking for " + wfId)
req := oclib.NewRequestAdmin(collection, nil)
// req := oclib.NewRequest(collection, user, peerID, groups, caller)
resp.UUID = uuid.New().String()
fmt.Println(user, peerID, groups)
sch, _, _, err := resp.Schedules(wfId, &tools.APIRequest{
Username: user,
PeerID: peerID,
Groups: groups,
Caller: nil,
Admin: true,
})
if err != nil {
if sch != nil {
for _, w := range sch.WorkflowExecution {
req.DeleteOne(w.GetID())
}
}
o.Data["json"] = map[string]interface{}{
"data": nil,
"code": 409,
"error": "Error when scheduling your execution(s): " + err.Error(),
}
o.ServeJSON()
return
}
o.Data["json"] = map[string]interface{}{
"data": sch.WorkflowExecution,
"code": code,
"error": e,
}
o.ServeJSON()
}
var wsUpgrader = gorillaws.Upgrader{
CheckOrigin: func(r *http.Request) bool { return true },
}
// @Title CheckStream
// @Description WebSocket stream for slot availability checking.
// @Param id path string true "workflow id"
// @Param as_possible query bool false "search from now"
// @Param preemption query bool false "force-valid, surface warnings"
// @router /:id/check [get]
func (o *WorkflowSchedulerController) CheckStream() {
CheckStreamHandler(o.Ctx.ResponseWriter, o.Ctx.Request)
}
// CheckStreamHandler is the WebSocket handler for slot availability checking.
// It is invoked via the CheckStream controller method.
// Query params: as_possible=true, preemption=true
func CheckStreamHandler(w http.ResponseWriter, r *http.Request) {
wfID := strings.TrimSuffix(
strings.TrimPrefix(r.URL.Path, "/oc/"),
"/check",
)
q := r.URL.Query()
asap := q.Get("as_possible") == "true"
preemption := q.Get("preemption") == "true"
user, peerID, groups := oclib.ExtractTokenInfo(*r)
req := &tools.APIRequest{
Username: user,
PeerID: peerID,
Groups: groups,
Caller: nil,
Admin: true,
}
// Resolve the peer IDs concerned by this workflow before upgrading so we
// can abort cleanly with a plain HTTP error if the workflow is not found.
watchedPeers, err := infrastructure.GetWorkflowPeerIDs(wfID, req)
fmt.Println("Here my watched peers involved in workflow", watchedPeers)
if err != nil {
http.Error(w, `{"code":404,"error":"`+err.Error()+`"}`, http.StatusNotFound)
return
}
// Upgrade to WebSocket.
conn, err := wsUpgrader.Upgrade(w, r, nil)
if err != nil {
// gorilla already wrote the error response
return
}
// Read the schedule parameters sent by the client as the first message.
var ws infrastructure.WorkflowSchedule
if err := conn.ReadJSON(&ws); err != nil {
conn.Close()
return
}
// Subscribe to planner updates for the initially resolved peers and to
// workflow change notifications (peer list may change on workflow edit).
plannerCh, plannerUnsub := infrastructure.SubscribePlannerUpdates(watchedPeers)
wfCh, wfUnsub := infrastructure.SubscribeWorkflowUpdates(wfID)
// Unique ID for this check session — used to track refresh ownership.
sessionID := uuid.New().String()
// Request a fresh planner snapshot for every concerned peer.
// The first session to claim a peer becomes its refresh owner; others skip
// the duplicate PB_PLANNER emission. ownedPeers grows if the workflow's
// peer list changes (wfCh).
ownedPeers := infrastructure.RequestPlannerRefresh(watchedPeers, sessionID)
// Cleanup on exit (clean or forced): release refresh ownership for the
// peers this session claimed, which resets Refreshing state and emits
// PB_CLOSE_PLANNER so oc-discovery stops the planner stream.
defer func() {
conn.Close()
plannerUnsub()
wfUnsub()
infrastructure.ReleaseRefreshOwnership(ownedPeers, sessionID)
}()
push := func() error {
result, checkErr := ws.Check(wfID, asap, preemption, req)
fmt.Println(result, checkErr)
if checkErr != nil {
return checkErr
}
return conn.WriteJSON(result)
}
// Initial check.
if err := push(); err != nil {
return
}
// Read loop: detect client-side close and parse schedule parameter
// updates (date changes, booking mode changes, …) sent by the client.
updateCh := make(chan infrastructure.WorkflowSchedule, 1)
closeCh := make(chan struct{})
go func() {
defer close(closeCh)
for {
var updated infrastructure.WorkflowSchedule
if err := conn.ReadJSON(&updated); err != nil {
// Connection closed or unrecoverable read error.
return
}
// Drop the oldest pending update if the consumer hasn't caught up.
select {
case updateCh <- updated:
default:
<-updateCh
updateCh <- updated
}
}
}()
// Stream loop.
for {
select {
case updated := <-updateCh:
// The client changed the requested date/params: adopt the new
// schedule and re-run the check immediately.
ws = updated
if err := push(); err != nil {
return
}
case <-wfCh:
// The workflow was modified: refresh the peer list and re-subscribe
// so the stream watches the correct set of planners going forward.
if newPeers, err := infrastructure.GetWorkflowPeerIDs(wfID, req); err == nil {
plannerUnsub()
watchedPeers = newPeers
plannerCh, plannerUnsub = infrastructure.SubscribePlannerUpdates(newPeers)
// Claim refresh ownership for any newly added peers.
newOwned := infrastructure.RequestPlannerRefresh(newPeers, sessionID)
ownedPeers = append(ownedPeers, newOwned...)
}
if err := push(); err != nil {
return
}
case <-plannerCh:
// A planner snapshot arrived (or was evicted): re-evaluate.
if err := push(); err != nil {
return
}
case <-closeCh:
return
}
}
}
// @Title UnSchedule
// @Description schedule workflow
// @Param id path string true "id execution"
// @Param body body models.compute true "The compute content"
// @Success 200 {workspace} models.workspace
// @router /:id [delete]
func (o *WorkflowSchedulerController) UnSchedule() {
// user, peerID, groups := oclib.ExtractTokenInfo(*o.Ctx.Request)
id := o.Ctx.Input.Param(":id")
// TODO UNSCHEDULER
filter := &dbs.Filters{
And: map[string][]dbs.Filter{
"workflow_id": {{Operator: dbs.EQUAL.String(), Value: id}},
},
}
o.Data["json"] = oclib.NewRequestAdmin(collection, nil).Search(filter, "", true)
// o.Data["json"] = oclib.NewRequest(collection, user, peerID, groups, nil).Search(filter, "", true)
o.ServeJSON()
}
// @Title SearchScheduledDraftOrder
// @Description schedule workflow
// @Param id path string true "id execution"
// @Success 200 {workspace} models.workspace
// @router /:id/order [get]
func (o *WorkflowSchedulerController) SearchScheduledDraftOrder() {
user, peerID, groups := oclib.ExtractTokenInfo(*o.Ctx.Request)
id := o.Ctx.Input.Param(":id")
fmt.Println(user, peerID, groups)
filter := &dbs.Filters{
And: map[string][]dbs.Filter{
"workflow_id": {{Operator: dbs.EQUAL.String(), Value: id}},
"order_by": {{Operator: dbs.EQUAL.String(), Value: peerID}},
},
}
o.Data["json"] = oclib.NewRequestAdmin(orderCollection, nil).Search(filter, "", true)
//o.Data["json"] = oclib.NewRequest(orderCollection, user, peerID, groups, nil).Search(filter, "", true)
o.ServeJSON()
}

6
go.mod
View File

@@ -3,7 +3,7 @@ module oc-scheduler
go 1.25.0
require (
cloud.o-forge.io/core/oc-lib v0.0.0-20260312105633-a30173921f67
cloud.o-forge.io/core/oc-lib v0.0.0-20260317090440-1ac735cef10e
github.com/beego/beego/v2 v2.3.8
github.com/google/uuid v1.6.0
github.com/robfig/cron v1.2.0
@@ -61,7 +61,6 @@ require (
github.com/hashicorp/golang-lru v1.0.2 // indirect
github.com/jtolds/gls v4.20.0+incompatible // indirect
github.com/klauspost/compress v1.18.0 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/leodido/go-urn v1.4.0 // indirect
github.com/libp2p/go-libp2p/core v0.43.0-rc2 // indirect
github.com/mattn/go-colorable v0.1.14 // indirect
@@ -69,7 +68,7 @@ require (
github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/montanaflynn/stats v0.7.1 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/nats-io/nats.go v1.44.0 // indirect
github.com/nats-io/nats.go v1.44.0
github.com/nats-io/nkeys v0.4.11 // indirect
github.com/nats-io/nuid v1.0.1 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
@@ -77,7 +76,6 @@ require (
github.com/prometheus/client_model v0.6.2 // indirect
github.com/prometheus/common v0.65.0 // indirect
github.com/prometheus/procfs v0.17.0 // indirect
github.com/rogpeppe/go-internal v1.14.1 // indirect
github.com/rs/zerolog v1.34.0 // indirect
github.com/shiena/ansicolor v0.0.0-20230509054315-a9deabde6e02 // indirect
github.com/smartystreets/assertions v1.2.0 // indirect

99
go.sum
View File

@@ -1,34 +1,36 @@
cloud.o-forge.io/core/oc-lib v0.0.0-20260223141827-5d32b4646a86 h1:/7XYbCzzo062lYbyBM3MA7KLrJII9iCQzvw4T5g/4oY=
cloud.o-forge.io/core/oc-lib v0.0.0-20260223141827-5d32b4646a86/go.mod h1:jmyBwmsac/4V7XPL347qawF60JsBCDmNAMfn/ySXKYo=
cloud.o-forge.io/core/oc-lib v0.0.0-20260223142248-b08bbf51ddc5 h1:qxLz4rrFxB1dmJa0/Q6AWBwQgmVt7LVXB0RgwpGYeXE=
cloud.o-forge.io/core/oc-lib v0.0.0-20260223142248-b08bbf51ddc5/go.mod h1:jmyBwmsac/4V7XPL347qawF60JsBCDmNAMfn/ySXKYo=
cloud.o-forge.io/core/oc-lib v0.0.0-20260223144148-f28e2c362020 h1:F7Ifw3WgtCnDur1p5+EuFZrM9yy7KSWoIyDQ8opQE90=
cloud.o-forge.io/core/oc-lib v0.0.0-20260223144148-f28e2c362020/go.mod h1:jmyBwmsac/4V7XPL347qawF60JsBCDmNAMfn/ySXKYo=
cloud.o-forge.io/core/oc-lib v0.0.0-20260223145010-e10bb5545561 h1:q5m2UMsEgrfN0OJsoa4Sme0v4OO1pnIt8OsAwdL+5/A=
cloud.o-forge.io/core/oc-lib v0.0.0-20260223145010-e10bb5545561/go.mod h1:jmyBwmsac/4V7XPL347qawF60JsBCDmNAMfn/ySXKYo=
cloud.o-forge.io/core/oc-lib v0.0.0-20260223145640-e039fa56b64c h1:3PRvQdSSGjmw+Txkf0zWs3F+V9URq22zQCLR3o7bNBY=
cloud.o-forge.io/core/oc-lib v0.0.0-20260223145640-e039fa56b64c/go.mod h1:jmyBwmsac/4V7XPL347qawF60JsBCDmNAMfn/ySXKYo=
cloud.o-forge.io/core/oc-lib v0.0.0-20260223162637-ff830065ec27 h1:cw3R1/Ivlr3W1XZ2cCHRrLB6UG/3dhdvG0i+P5W1tYc=
cloud.o-forge.io/core/oc-lib v0.0.0-20260223162637-ff830065ec27/go.mod h1:jmyBwmsac/4V7XPL347qawF60JsBCDmNAMfn/ySXKYo=
cloud.o-forge.io/core/oc-lib v0.0.0-20260224092928-54aef164ba10 h1:9i8fDtGjg3JDniCO7VGtkd8zHXWze7OJ3tvO4mZnBmY=
cloud.o-forge.io/core/oc-lib v0.0.0-20260224092928-54aef164ba10/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
cloud.o-forge.io/core/oc-lib v0.0.0-20260224093610-a9ebad78f3a8 h1:xoC5PAz1469QxrNm8rrsq5+BtwshEt+L2Nhf90MrqrM=
cloud.o-forge.io/core/oc-lib v0.0.0-20260224093610-a9ebad78f3a8/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
cloud.o-forge.io/core/oc-lib v0.0.0-20260304145747-e03a0d3dd0aa h1:1wCpI4dwN1pj6MlpJ7/WifhHVHmCE4RU+9klwqgo/bk=
cloud.o-forge.io/core/oc-lib v0.0.0-20260304145747-e03a0d3dd0aa/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
cloud.o-forge.io/core/oc-lib v0.0.0-20260311072518-933b7147e908 h1:1jz3xI/u2FzCG8phY7ShqADrmCj0mlrdjbdNUosSwgs=
cloud.o-forge.io/core/oc-lib v0.0.0-20260311072518-933b7147e908/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
cloud.o-forge.io/core/oc-lib v0.0.0-20260311084029-97bfb0582a99 h1:60BGJeR9uvpDwvNeWqVBnB2JjWLOZv16sUGZjzXSQlg=
cloud.o-forge.io/core/oc-lib v0.0.0-20260311084029-97bfb0582a99/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
cloud.o-forge.io/core/oc-lib v0.0.0-20260312073634-2c9c42dd516a h1:oCkb9l/Cvn0x6iicxIydrjfCNU+UHhKuklFgfzDa174=
cloud.o-forge.io/core/oc-lib v0.0.0-20260312073634-2c9c42dd516a/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
cloud.o-forge.io/core/oc-lib v0.0.0-20260312083310-f5e199132416 h1:QHR5pzCI/HUawu8pst5Ggio6WPCUUf8XYjNMVk8kSqo=
cloud.o-forge.io/core/oc-lib v0.0.0-20260312083310-f5e199132416/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
cloud.o-forge.io/core/oc-lib v0.0.0-20260312104524-e28b79ac0d62 h1:sHzacZxPIKHyjL4EkgG/c7MI8gM1xmLdhaoUx2ZsH+M=
cloud.o-forge.io/core/oc-lib v0.0.0-20260312104524-e28b79ac0d62/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
cloud.o-forge.io/core/oc-lib v0.0.0-20260312105633-a30173921f67 h1:x6klvxiRpU1KcvmygIcHGDHFW3CbWC05El6Fryvr3uo=
cloud.o-forge.io/core/oc-lib v0.0.0-20260312105633-a30173921f67/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
cloud.o-forge.io/core/oc-lib v0.0.0-20260112144037-c35b06e0bc3c h1:9lXrj1agE1clFfxOXRrVXi4PEvlAuWKb4z977c2uk4k=
cloud.o-forge.io/core/oc-lib v0.0.0-20260112144037-c35b06e0bc3c/go.mod h1:vHWauJsS6ryf7UDqq8hRXoYD5RsONxcFTxeZPOztEuI=
cloud.o-forge.io/core/oc-lib v0.0.0-20260316075231-465b91fd6ecb h1:yO8KQpNHYIv4O6LrkRacFsTQrLv5qYYeO8KD1e1eunA=
cloud.o-forge.io/core/oc-lib v0.0.0-20260316075231-465b91fd6ecb/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
cloud.o-forge.io/core/oc-lib v0.0.0-20260316082848-9e5102893f8a h1:4HHebXbTSzkD1MG/1GU5kZx45xx9IQ0sibndPuarlp0=
cloud.o-forge.io/core/oc-lib v0.0.0-20260316082848-9e5102893f8a/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
cloud.o-forge.io/core/oc-lib v0.0.0-20260316094939-48f034316b91 h1:wm4oeR1mQE1esHAte9dYB8HC+pjY+G7zwfgQUxPO5g8=
cloud.o-forge.io/core/oc-lib v0.0.0-20260316094939-48f034316b91/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
cloud.o-forge.io/core/oc-lib v0.0.0-20260316095931-a86e78841b34 h1:OxxfSNhdkqX165YzurzicnrU55s6n4pZjOg+HmkDzUc=
cloud.o-forge.io/core/oc-lib v0.0.0-20260316095931-a86e78841b34/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
cloud.o-forge.io/core/oc-lib v0.0.0-20260316104105-deb819c5af95 h1:efOmy48+aw8vGGqHHUfrxVQJq0TlIux0/4aDcH7Wcpw=
cloud.o-forge.io/core/oc-lib v0.0.0-20260316104105-deb819c5af95/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
cloud.o-forge.io/core/oc-lib v0.0.0-20260316104558-4a076ba23738 h1:L/xd9d1MCyhwQLwiuaAzID7pRUnotikGSe7jhSqtqPs=
cloud.o-forge.io/core/oc-lib v0.0.0-20260316104558-4a076ba23738/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
cloud.o-forge.io/core/oc-lib v0.0.0-20260316104751-40a986af41b8 h1:02FkLYGjbGp/gq8Ej31KIXwF8QaQzJG/IODQt6GogT8=
cloud.o-forge.io/core/oc-lib v0.0.0-20260316104751-40a986af41b8/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
cloud.o-forge.io/core/oc-lib v0.0.0-20260316113239-6d8efd137ac5 h1:NF+TYU0it9cWsrTGngv9KVGgrglMCO522/huR2RJNu0=
cloud.o-forge.io/core/oc-lib v0.0.0-20260316113239-6d8efd137ac5/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
cloud.o-forge.io/core/oc-lib v0.0.0-20260316114821-9bf2c566e922 h1:B1DzkKyidaSLC7cdJ3jg+kQR9gU20DlGS+KjI8SmlDg=
cloud.o-forge.io/core/oc-lib v0.0.0-20260316114821-9bf2c566e922/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
cloud.o-forge.io/core/oc-lib v0.0.0-20260316121650-a4d81cbb67f4 h1:k/xjsnRPIQjoaXp59x0CdwncpJa8KV7Fiyf78fgx7Ao=
cloud.o-forge.io/core/oc-lib v0.0.0-20260316121650-a4d81cbb67f4/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
cloud.o-forge.io/core/oc-lib v0.0.0-20260316145919-b110cbc260c9 h1:+Yk9oHpChZhQYce2GY3HnFfW6AdeYAO31kczhwwpKgc=
cloud.o-forge.io/core/oc-lib v0.0.0-20260316145919-b110cbc260c9/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
cloud.o-forge.io/core/oc-lib v0.0.0-20260317080147-9b2f9451767e h1:tKipQ9WFDJZXgex5MORwI3v0lJsEPaHHeIJqVWA3Vzk=
cloud.o-forge.io/core/oc-lib v0.0.0-20260317080147-9b2f9451767e/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
cloud.o-forge.io/core/oc-lib v0.0.0-20260317083202-65237f0d1f3f h1:X8ytAjBzEqnFL1YQnjm9Ol/aoCiU/H6IgdzX74ZhFig=
cloud.o-forge.io/core/oc-lib v0.0.0-20260317083202-65237f0d1f3f/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
cloud.o-forge.io/core/oc-lib v0.0.0-20260317090440-1ac735cef10e h1:e/oYMPAqD27l3Rd473Xny/2Ut/LZnBYXAzfQArNOmrs=
cloud.o-forge.io/core/oc-lib v0.0.0-20260317090440-1ac735cef10e/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0=
github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
github.com/beego/beego/v2 v2.3.8 h1:wplhB1pF4TxR+2SS4PUej8eDoH4xGfxuHfS7wAk9VBc=
github.com/beego/beego/v2 v2.3.8/go.mod h1:8vl9+RrXqvodrl9C8yivX1e6le6deCK6RWeq8R7gTTg=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
@@ -77,6 +79,8 @@ github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJn
github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
github.com/go-playground/validator/v10 v10.27.0 h1:w8+XrWVMhGkxOaaowyKH35gFydVHOvC0/uWoy2Fzwn4=
github.com/go-playground/validator/v10 v10.27.0/go.mod h1:I5QpIEbmr8On7W0TktmJAumgzX4CA1XNl4ZmDuVHKKo=
github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/golang/snappy v1.0.0 h1:Oy607GVXHs7RtbggtPBnr2RmDArIsAefDwvrdWvRhGs=
github.com/golang/snappy v1.0.0/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
@@ -85,14 +89,14 @@ github.com/google/gnostic-models v0.7.0/go.mod h1:whL5G0m6dmc5cPxKc5bdKdEN3UjI7O
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8=
github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1 h1:EGx4pi6eqNxGaHF6qqu48+N2wcFQ5qg5FXgOdqsJ5d8=
github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
github.com/goraz/onion v0.1.3 h1:KhyvbDA2b70gcz/d5izfwTiOH8SmrvV43AsVzpng3n0=
github.com/goraz/onion v0.1.3/go.mod h1:XEmz1XoBz+wxTgWB8NwuvRm4RAu3vKxvrmYtzK+XCuQ=
github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 h1:JeSE6pjso5THxAzdVpqr6/geYxZytqFMBCOtn/ujyeo=
github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674/go.mod h1:r4w70xmWCQKmi1ONH4KIaBptdivuRPyosB9RmPlGEwA=
github.com/hashicorp/golang-lru v1.0.2 h1:dV3g9Z/unq5DpblPpw+Oqcv4dU/1omnb4Ok8iPY6p1c=
@@ -176,6 +180,10 @@ github.com/nats-io/nkeys v0.4.11/go.mod h1:szDimtgmfOi9n25JpfIdGw12tZFYXqhGxjhVx
github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw=
github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
github.com/ogier/pflag v0.0.1/go.mod h1:zkFki7tvTa0tafRvTBIZTvzYyAu6kQhPZFnshFFPE+g=
github.com/onsi/ginkgo/v2 v2.27.2 h1:LzwLj0b89qtIy6SSASkzlNvX6WktqurSHwkk2ipF/Ns=
github.com/onsi/ginkgo/v2 v2.27.2/go.mod h1:ArE1D/XhNXBXCBkKOLkbsb2c81dQHCRcF5zwn/ykDRo=
github.com/onsi/gomega v1.38.2 h1:eZCjf2xjZAqe+LeWvKb5weQ+NcPwX84kqJ0cZNxok2A=
github.com/onsi/gomega v1.38.2/go.mod h1:W2MJcYxRGV63b418Ai34Ud0hEdTVXq9NW9+Sx6uXf3k=
github.com/pelletier/go-toml v1.6.0/go.mod h1:5N711Q9dKgbdkxHL+MEfF31hpT7l0S0s/t2kKREewys=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
@@ -191,8 +199,7 @@ github.com/prometheus/procfs v0.17.0 h1:FuLQ+05u4ZI+SS/w9+BWEM2TXiHKsUQ9TADiRH7D
github.com/prometheus/procfs v0.17.0/go.mod h1:oPQLaDAMRbA+u8H5Pbfq+dl3VDAvHxMUOVhe0wYB2zw=
github.com/robfig/cron v1.2.0 h1:ZjScXvvxeQ63Dbyxy76Fj3AT3Ut0aKsyd2/tl3DTMuQ=
github.com/robfig/cron v1.2.0/go.mod h1:JGuDeoQd7Z6yL4zQhZ3OPEVHB7fL6Ka6skscFHfmt2k=
github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0=
github.com/rs/zerolog v1.34.0 h1:k43nTLIwcTVQAncfCw4KZ2VY6ukYoZaBPNOE8txlOeY=
@@ -208,15 +215,19 @@ github.com/smartystreets/goconvey v1.7.2 h1:9RBaZCeXEQ3UselpuwUQHltGVXvdwm6cv1hg
github.com/smartystreets/goconvey v1.7.2/go.mod h1:Vw0tHAZW6lzCRk3xgdin6fKYcG+G3Pg9vgXWeJpQFMM=
github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI=
github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY=
github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c=
@@ -239,28 +250,24 @@ go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20191112222119-e1110fd1c708/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.40.0 h1:r4x+VvoG5Fm+eJcxMaY8CQM7Lb0l1lsmjGBQ6s8BfKM=
golang.org/x/crypto v0.40.0/go.mod h1:Qr1vMER5WyS2dfPHAlsOj01wgLbsyWtFn/aY+5+ZdxY=
golang.org/x/crypto v0.44.0 h1:A97SsFvM3AIwEEmTBiaxPPTYpDC47w720rdiiUvgoAU=
golang.org/x/crypto v0.44.0/go.mod h1:013i+Nw79BMiQiMsOPcVCB5ZIJbYkerPrGnOa00tvmc=
golang.org/x/exp v0.0.0-20250606033433-dcc06ee1d476 h1:bsqhLWFR6G6xiQcb+JoGqdKdRU6WzPWmK8E0jxTjzo4=
golang.org/x/exp v0.0.0-20250606033433-dcc06ee1d476/go.mod h1:3//PLf8L/X+8b4vuAfHzxeRUl04Adcb341+IGKfnqS8=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA=
golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.42.0 h1:jzkYrhi3YQWD6MLBJcsklgQsoAcw89EcZbJw8Z614hs=
golang.org/x/net v0.42.0/go.mod h1:FF1RA5d3u7nAYA4z2TkclSCKh68eSXtiFwcWQpPXdt8=
golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY=
golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU=
golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI=
golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw=
golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I=
golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@@ -273,8 +280,6 @@ golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA=
golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
@@ -285,8 +290,6 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
golang.org/x/text v0.27.0 h1:4fGWRpyh641NLlecmyl4LOe6yDdfaYNrGb2zdfo4JV4=
golang.org/x/text v0.27.0/go.mod h1:1D28KMCvyooCX9hBiosv5Tz/+YLxj0j7XhWjpSUF7CU=
golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM=
golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM=
golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY=
@@ -295,9 +298,9 @@ golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGm
golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ=
golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY=
google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc=
google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=

343
infrastructure/check.go Normal file
View File

@@ -0,0 +1,343 @@
package infrastructure
import (
"errors"
"fmt"
"time"
oclib "cloud.o-forge.io/core/oc-lib"
"cloud.o-forge.io/core/oc-lib/models/booking/planner"
"cloud.o-forge.io/core/oc-lib/models/resources"
"cloud.o-forge.io/core/oc-lib/models/workflow"
"cloud.o-forge.io/core/oc-lib/tools"
)
// ---------------------------------------------------------------------------
// Slot availability check
// ---------------------------------------------------------------------------
const (
checkWindowHours = 5 // how far ahead to scan for a free slot (hours)
checkStepMin = 15 // time increment per scan step (minutes)
// asapBuffer is the minimum lead time added to time.Now() for as_possible
// and WHEN_POSSIBLE bookings. It absorbs NATS propagation + p2p stream
// latency so the ExpectedStartDate never arrives already in the past at
// the destination peer.
asapBuffer = 2 * time.Minute
)
// CheckResult holds the outcome of a slot availability check.
type CheckResult struct {
Available bool `json:"available"`
Start time.Time `json:"start"`
End *time.Time `json:"end,omitempty"`
// NextSlot is the nearest free slot found within checkWindowHours when
// the requested slot is unavailable, or the preferred (conflict-free) slot
// when running in preemption mode.
NextSlot *time.Time `json:"next_slot,omitempty"`
Warnings []string `json:"warnings,omitempty"`
// Preemptible is true when the check was run in preemption mode.
Preemptible bool `json:"preemptible,omitempty"`
// SchedulingID is the session identifier the client must supply to Schedule
// in order to confirm the draft bookings created during this Check session.
SchedulingID string `json:"scheduling_id,omitempty"`
}
// bookingResource is the minimum info needed to verify a resource against the
// planner cache.
type bookingResource struct {
id string // resource MongoDB _id
peerPID string // peer public PeerID (PID) — PlannerCache key
instanceID string // resolved from WorkflowSchedule.SelectedInstances
}
// Check verifies that all booking-relevant resources (storage and compute) of
// the given workflow have capacity for the requested time slot.
//
// - asap=true → ignore ws.Start, begin searching from time.Now()
// - preemption → always return Available=true but populate Warnings with
// conflicts and NextSlot with the nearest conflict-free alternative
func (ws *WorkflowSchedule) Check(wfID string, asap bool, preemption bool, request *tools.APIRequest) (*CheckResult, error) {
// 1. Load workflow
obj, code, err := workflow.NewAccessor(request).LoadOne(wfID)
if code != 200 || err != nil {
msg := "could not load workflow " + wfID
if err != nil {
msg += ": " + err.Error()
}
return nil, errors.New(msg)
}
wf := obj.(*workflow.Workflow)
// 2. Resolve start
start := ws.Start
if asap || start.IsZero() {
start = time.Now().Add(asapBuffer)
}
// 3. Resolve end use explicit end/duration or estimate via Planify
end := ws.End
if end == nil {
if ws.DurationS > 0 {
e := start.Add(time.Duration(ws.DurationS * float64(time.Second)))
end = &e
} else {
_, longest, _, _, planErr := wf.Planify(
start, nil,
ws.SelectedInstances, ws.SelectedPartnerships,
ws.SelectedBuyings, ws.SelectedStrategies,
int(ws.BookingMode), request,
)
if planErr == nil && longest > 0 {
e := start.Add(time.Duration(longest) * time.Second)
end = &e
}
}
}
// 4. Extract booking-relevant (storage + compute) resources from the graph,
// resolving the selected instance for each resource.
checkables := collectBookingResources(wf, ws.SelectedInstances)
// 5. Check every resource against its peer's planner
unavailable, warnings := checkResourceAvailability(checkables, start, end)
result := &CheckResult{
Start: start,
End: end,
Warnings: warnings,
}
// 6. Preemption mode: mark as schedulable regardless of conflicts, but
// surface warnings and the nearest conflict-free alternative.
if preemption {
result.Available = true
result.Preemptible = true
if len(unavailable) > 0 {
result.NextSlot = findNextSlot(checkables, start, end, checkWindowHours)
}
return result, nil
}
// 7. All resources are free
if len(unavailable) == 0 {
result.Available = true
return result, nil
}
// 8. Slot unavailable locate the nearest free slot within the window
result.Available = false
result.NextSlot = findNextSlot(checkables, start, end, checkWindowHours)
return result, nil
}
// collectBookingResources returns unique storage and compute resources from the
// workflow graph. For each resource the selected instance ID is resolved from
// selectedInstances (the scheduler's SelectedInstances ConfigItem) so the planner
// check targets the exact instance chosen by the user.
func collectBookingResources(wf *workflow.Workflow, selectedInstances workflow.ConfigItem) map[string]bookingResource {
if wf.Graph == nil {
return nil
}
seen := map[string]bool{}
result := map[string]bookingResource{}
// Resolve MongoDB peer _id (DID) → public PeerID (PID) used as PlannerCache key.
peerAccess := oclib.NewRequestAdmin(oclib.LibDataEnum(oclib.PEER), nil)
didToPID := map[string]string{}
resolvePID := func(did string) string {
if pid, ok := didToPID[did]; ok {
return pid
}
if data := peerAccess.LoadOne(did); data.Data != nil {
if p := data.ToPeer(); p != nil {
didToPID[did] = p.PeerID
return p.PeerID
}
}
return ""
}
resolveInstanceID := func(res interface {
GetID() string
GetCreatorID() string
}) string {
idx := selectedInstances.Get(res.GetID())
switch r := res.(type) {
case *resources.StorageResource:
if inst := r.GetSelectedInstance(idx); inst != nil {
return inst.GetID()
}
case *resources.ComputeResource:
if inst := r.GetSelectedInstance(idx); inst != nil {
return inst.GetID()
}
}
return ""
}
for _, item := range wf.GetGraphItems(wf.Graph.IsStorage) {
i := item
_, res := i.GetResource()
if res == nil {
continue
}
id := res.GetID()
if seen[id] {
continue
}
pid := resolvePID(res.GetCreatorID())
if pid == "" {
continue
}
seen[id] = true
result[pid] = bookingResource{
id: id,
peerPID: pid,
instanceID: resolveInstanceID(res),
}
}
for _, item := range wf.GetGraphItems(wf.Graph.IsCompute) {
i := item
_, res := i.GetResource()
if res == nil {
continue
}
id := res.GetID()
if seen[id] {
continue
}
pid := resolvePID(res.GetCreatorID())
if pid == "" {
continue
}
seen[id] = true
result[pid] = bookingResource{
id: id,
peerPID: pid,
instanceID: resolveInstanceID(res),
}
}
return result
}
// checkResourceAvailability returns the IDs of unavailable resources and
// human-readable warning messages.
func checkResourceAvailability(res map[string]bookingResource, start time.Time, end *time.Time) (unavailable []string, warnings []string) {
for _, r := range res {
plannerMu.RLock()
entry := PlannerCache[r.peerPID]
plannerMu.RUnlock()
if entry == nil || entry.Planner == nil {
warnings = append(warnings, fmt.Sprintf(
"peer %s planner not in cache for resource %s assuming available", r.peerPID, r.id))
continue
}
if !checkInstance(entry.Planner, r.id, r.instanceID, start, end) {
unavailable = append(unavailable, r.id)
warnings = append(warnings, fmt.Sprintf(
"resource %s is not available in [%s %s]",
r.id, start.Format(time.RFC3339), formatOptTime(end)))
}
}
return
}
// checkInstance checks availability for the specific instance resolved by the
// scheduler. When instanceID is empty (no instance selected / none resolvable),
// it falls back to checking all instances known in the planner and returns true
// if any one has remaining capacity. Returns true when no capacity is recorded.
func checkInstance(p *planner.Planner, resourceID string, instanceID string, start time.Time, end *time.Time) bool {
if instanceID != "" {
return p.Check(resourceID, instanceID, nil, start, end)
}
// Fallback: accept if any known instance has free capacity
caps, ok := p.Capacities[resourceID]
if !ok || len(caps) == 0 {
return true // no recorded usage → assume free
}
for id := range caps {
if p.Check(resourceID, id, nil, start, end) {
return true
}
}
return false
}
// findNextSlot scans forward from 'from' in checkStepMin increments for up to
// windowH hours and returns the first candidate start time at which all
// resources are simultaneously free.
func findNextSlot(resources map[string]bookingResource, from time.Time, originalEnd *time.Time, windowH int) *time.Time {
duration := time.Hour
if originalEnd != nil {
if d := originalEnd.Sub(from); d > 0 {
duration = d
}
}
step := time.Duration(checkStepMin) * time.Minute
limit := from.Add(time.Duration(windowH) * time.Hour)
for t := from.Add(step); t.Before(limit); t = t.Add(step) {
e := t.Add(duration)
if unavail, _ := checkResourceAvailability(resources, t, &e); len(unavail) == 0 {
return &t
}
}
return nil
}
func formatOptTime(t *time.Time) string {
if t == nil {
return "open"
}
return t.Format(time.RFC3339)
}
// GetWorkflowPeerIDs loads the workflow and returns the deduplicated list of
// creator peer IDs for all its storage and compute resources.
// These are the peers whose planners must be watched by a check stream.
func GetWorkflowPeerIDs(wfID string, request *tools.APIRequest) ([]string, error) {
obj, code, err := workflow.NewAccessor(request).LoadOne(wfID)
if code != 200 || err != nil {
msg := "could not load workflow " + wfID
if err != nil {
msg += ": " + err.Error()
}
return nil, errors.New(msg)
}
wf := obj.(*workflow.Workflow)
if wf.Graph == nil {
return nil, nil
}
seen := map[string]bool{}
var peerIDs []string
for _, item := range wf.GetGraphItems(wf.Graph.IsStorage) {
i := item
_, res := i.GetResource()
if res == nil {
continue
}
if id := res.GetCreatorID(); id != "" && !seen[id] {
seen[id] = true
peerIDs = append(peerIDs, id)
}
}
for _, item := range wf.GetGraphItems(wf.Graph.IsCompute) {
i := item
_, res := i.GetResource()
if res == nil {
continue
}
if id := res.GetCreatorID(); id != "" && !seen[id] {
seen[id] = true
peerIDs = append(peerIDs, id)
}
}
realPeersID := []string{}
access := oclib.NewRequestAdmin(oclib.LibDataEnum(tools.PEER), nil)
for _, id := range peerIDs {
if data := access.LoadOne(id); data.Data != nil {
realPeersID = append(realPeersID, data.ToPeer().PeerID)
}
}
return realPeersID, nil
}

197
infrastructure/considers.go Normal file
View File

@@ -0,0 +1,197 @@
package infrastructure
import (
"encoding/json"
"fmt"
"sync"
oclib "cloud.o-forge.io/core/oc-lib"
"cloud.o-forge.io/core/oc-lib/models/common/enum"
"cloud.o-forge.io/core/oc-lib/models/utils"
"cloud.o-forge.io/core/oc-lib/models/workflow"
"cloud.o-forge.io/core/oc-lib/models/workflow_execution"
"cloud.o-forge.io/core/oc-lib/tools"
"oc-scheduler/infrastructure/scheduling"
)
type executionConsidersPayload struct {
ID string `json:"id"`
ExecutionsID string `json:"executions_id"`
ExecutionID string `json:"execution_id"`
PeerIDs []string `json:"peer_ids"`
}
// ---------------------------------------------------------------------------
// Per-execution mutex map (replaces the global stateMu)
// ---------------------------------------------------------------------------
var execLocksMu sync.RWMutex
var execLocks = map[string]*sync.Mutex{} // executionID → per-execution mutex
// RegisterExecLock creates a mutex entry for the execution. Called when a new execution draft is persisted.
func RegisterExecLock(executionID string) {
execLocksMu.Lock()
execLocks[executionID] = &sync.Mutex{}
execLocksMu.Unlock()
}
// UnregisterExecLock removes the mutex entry. Called on unschedule and execution deletion.
func UnregisterExecLock(executionID string) {
execLocksMu.Lock()
delete(execLocks, executionID)
execLocksMu.Unlock()
}
// applyConsidersLocal applies the considers update directly for a confirmed
// booking or purchase (bypasses NATS since updateExecutionState resolves the
// execution from the resource itself).
func applyConsidersLocal(id string, dt tools.DataType) {
payload, err := json.Marshal(&executionConsidersPayload{ID: id})
if err != nil {
return
}
updateExecutionState(payload, dt)
}
// EmitConsidersExecution broadcasts a Considers / WORKFLOW_EXECUTION message to all
// storage and compute peers of wf once the execution has transitioned to SCHEDULED.
// Each receiving peer will use it to confirm (IsDraft=false) their local drafts.
func EmitConsidersExecution(exec *workflow_execution.WorkflowExecution, wf *workflow.Workflow) {
if wf == nil || wf.Graph == nil {
return
}
peerIDs, err := GetWorkflowPeerIDs(wf.GetID(), &tools.APIRequest{Admin: true})
if err != nil {
return
}
if len(peerIDs) == 0 {
return
}
payload, err := json.Marshal(executionConsidersPayload{
ID: exec.GetID(),
ExecutionID: exec.GetID(),
ExecutionsID: exec.ExecutionsID,
PeerIDs: peerIDs})
if err != nil {
return
}
b, err := json.Marshal(tools.PropalgationMessage{
DataType: int(tools.WORKFLOW_EXECUTION),
Action: tools.PB_CONSIDERS,
Payload: payload,
})
if err != nil {
return
}
tools.NewNATSCaller().SetNATSPub(tools.PROPALGATION_EVENT, tools.NATSResponse{
FromApp: "oc-scheduler",
Datatype: tools.WORKFLOW_EXECUTION,
Method: int(tools.PROPALGATION_EVENT),
Payload: b,
})
}
// updateExecutionState sets BookingsState[id]=true (dt==BOOKING) or
// PurchasesState[id]=true (dt==PURCHASE_RESOURCE) on the target execution.
// payload must be JSON-encoded {"id":"...", "execution_id":"..."}.
func updateExecutionState(payload []byte, dt tools.DataType) {
var data executionConsidersPayload
if err := json.Unmarshal(payload, &data); err != nil || data.ID == "" {
return
}
schdata := oclib.NewRequestAdmin(oclib.LibDataEnum(dt), nil).LoadOne(data.ID)
if schdata.Data == nil {
return
}
sch := scheduling.ToSchedulerObject(dt, schdata.Data)
if sch == nil {
return
}
execID := sch.GetExecutionId()
execLocksMu.RLock()
mu := execLocks[execID]
execLocksMu.RUnlock()
if mu == nil {
fmt.Printf("updateExecutionState: no lock for execution %s, skipping\n", execID)
return
}
mu.Lock()
defer mu.Unlock()
adminReq := &tools.APIRequest{Admin: true}
res, _, err := workflow_execution.NewAccessor(adminReq).LoadOne(execID)
if err != nil || res == nil {
fmt.Printf("updateExecutionState: could not load execution %s: %v\n", data.ExecutionID, err)
return
}
exec := res.(*workflow_execution.WorkflowExecution)
fmt.Println("sch.GetExecutionId()", data.ID, exec.BookingsState)
switch dt {
case tools.BOOKING:
if exec.BookingsState == nil {
exec.BookingsState = map[string]bool{}
}
exec.BookingsState[data.ID] = true
fmt.Println("sch.GetExecutionId()", data.ID)
case tools.PURCHASE_RESOURCE:
if exec.PurchasesState == nil {
exec.PurchasesState = map[string]bool{}
}
exec.PurchasesState[data.ID] = true
}
allConfirmed := true
for _, st := range exec.BookingsState {
if !st {
allConfirmed = false
break
}
}
for _, st := range exec.PurchasesState {
if !st {
allConfirmed = false
break
}
}
if allConfirmed {
exec.State = enum.SCHEDULED
exec.IsDraft = false
}
if _, _, err := utils.GenericRawUpdateOne(exec, exec.GetID(), workflow_execution.NewAccessor(adminReq)); err != nil {
fmt.Printf("updateExecutionState: could not update execution %s: %v\n", sch.GetExecutionId(), err)
return
}
if allConfirmed {
// Confirm the order and notify all peers that execution is scheduled.
go confirmSessionOrder(exec.ExecutionsID, adminReq)
obj, _, err := workflow.NewAccessor(adminReq).LoadOne(exec.WorkflowID)
if err == nil && obj != nil {
go EmitConsidersExecution(exec, obj.(*workflow.Workflow))
}
}
}
// confirmExecutionDrafts is called when a Considers/WORKFLOW_EXECUTION message
// is received from oc-discovery, meaning the originating peer has confirmed the
// execution as SCHEDULED. For every booking and purchase ID listed in the
// execution's states, we confirm the local draft (IsDraft=false).
func confirmExecutionDrafts(payload []byte) {
var data executionConsidersPayload
if err := json.Unmarshal(payload, &data); err != nil {
fmt.Printf("confirmExecutionDrafts: could not parse payload: %v\n", err)
return
}
access := oclib.NewRequestAdmin(oclib.LibDataEnum(tools.WORKFLOW_EXECUTION), nil)
d := access.LoadOne(data.ExecutionID)
if exec := d.ToWorkflowExecution(); exec != nil {
for id := range exec.BookingsState {
go confirmResource(id, tools.BOOKING)
}
for id := range exec.PurchasesState {
go confirmResource(id, tools.PURCHASE_RESOURCE)
}
}
}

View File

@@ -5,155 +5,18 @@ import (
"encoding/json"
"fmt"
"oc-scheduler/conf"
"slices"
"sync"
"time"
oclib "cloud.o-forge.io/core/oc-lib"
"cloud.o-forge.io/core/oc-lib/config"
"cloud.o-forge.io/core/oc-lib/models/booking"
"cloud.o-forge.io/core/oc-lib/models/booking/planner"
"cloud.o-forge.io/core/oc-lib/models/common/enum"
"cloud.o-forge.io/core/oc-lib/models/resources/purchase_resource"
"cloud.o-forge.io/core/oc-lib/models/utils"
"cloud.o-forge.io/core/oc-lib/models/workflow"
"cloud.o-forge.io/core/oc-lib/models/workflow/graph"
"cloud.o-forge.io/core/oc-lib/models/workflow_execution"
"cloud.o-forge.io/core/oc-lib/tools"
"github.com/nats-io/nats.go"
)
const plannerTTL = 24 * time.Hour
// ---------------------------------------------------------------------------
// Planner cache — protected by plannerMu
// ---------------------------------------------------------------------------
// plannerEntry wraps a planner snapshot with refresh-ownership tracking.
// At most one check session may be the "refresh owner" of a given peer's
// planner at a time: it emits PB_PLANNER to request a fresh snapshot from
// oc-discovery and, on close (clean or forced), emits PB_CLOSE_PLANNER to
// release the stream. Any subsequent session that needs the same peer's
// planner will see Refreshing=true and skip the duplicate request.
type plannerEntry struct {
Planner *planner.Planner
Refreshing bool // true while a PB_PLANNER request is in flight
RefreshOwner string // session UUID that initiated the current refresh
}
var plannerMu sync.RWMutex
var PlannerCache = map[string]*plannerEntry{}
var plannerAddedAt = map[string]time.Time{} // peerID → first-seen timestamp
// ---------------------------------------------------------------------------
// Subscriber registries — one keyed by peerID, one by workflowID
// ---------------------------------------------------------------------------
var subsMu sync.RWMutex
var plannerSubs = map[string][]chan struct{}{} // peerID → notification channels
var workflowSubs = map[string][]chan struct{}{} // workflowID → notification channels
// SubscribePlannerUpdates registers interest in planner changes for the given
// peer IDs. The returned channel receives one struct{} (non-blocking) each time
// any of those planners is updated. Call cancel to unregister.
func SubscribePlannerUpdates(peerIDs []string) (<-chan struct{}, func()) {
return subscribe(&subsMu, plannerSubs, peerIDs)
}
// SubscribeWorkflowUpdates registers interest in workflow modifications for the
// given workflow ID. The returned channel is signalled when the workflow changes
// (peer list may have grown or shrunk). Call cancel to unregister.
func SubscribeWorkflowUpdates(wfID string) (<-chan struct{}, func()) {
ch, cancel := subscribe(&subsMu, workflowSubs, []string{wfID})
return ch, cancel
}
// subscribe is the generic helper used by both registries.
func subscribe(mu *sync.RWMutex, registry map[string][]chan struct{}, keys []string) (<-chan struct{}, func()) {
ch := make(chan struct{}, 1)
mu.Lock()
for _, k := range keys {
registry[k] = append(registry[k], ch)
}
mu.Unlock()
cancel := func() {
mu.Lock()
for _, k := range keys {
subs := registry[k]
for i, s := range subs {
if s == ch {
registry[k] = append(subs[:i], subs[i+1:]...)
break
}
}
}
mu.Unlock()
}
return ch, cancel
}
func notifyPlannerWatchers(peerID string) {
notify(&subsMu, plannerSubs, peerID)
}
func notifyWorkflowWatchers(wfID string) {
notify(&subsMu, workflowSubs, wfID)
}
func notify(mu *sync.RWMutex, registry map[string][]chan struct{}, key string) {
mu.RLock()
subs := registry[key]
mu.RUnlock()
for _, ch := range subs {
select {
case ch <- struct{}{}:
default:
}
}
}
// ---------------------------------------------------------------------------
// Cache helpers
// ---------------------------------------------------------------------------
// storePlanner inserts or updates the planner snapshot for peerID.
// On first insertion it schedules an automatic eviction after plannerTTL.
// Existing refresh-ownership state (Refreshing / RefreshOwner) is preserved
// so that an in-flight request is not inadvertently reset.
// All subscribers interested in this peer are notified.
func storePlanner(peerID string, p *planner.Planner) {
plannerMu.Lock()
entry := PlannerCache[peerID]
isNew := entry == nil
if isNew {
entry = &plannerEntry{}
PlannerCache[peerID] = entry
plannerAddedAt[peerID] = time.Now()
go evictAfter(peerID, plannerTTL)
}
entry.Planner = p
plannerMu.Unlock()
notifyPlannerWatchers(peerID)
}
// evictAfter waits ttl from first insertion then deletes the cache entry and
// emits PB_CLOSE_PLANNER so oc-discovery stops streaming for this peer.
// This is the only path that actually removes an entry from PlannerCache;
// session close (ReleaseRefreshOwnership) only resets ownership state.
func evictAfter(peerID string, ttl time.Duration) {
time.Sleep(ttl)
plannerMu.Lock()
_, exists := PlannerCache[peerID]
if exists {
delete(PlannerCache, peerID)
delete(plannerAddedAt, peerID)
}
plannerMu.Unlock()
if exists {
EmitNATS(peerID, tools.PropalgationMessage{Action: tools.PB_CLOSE_PLANNER})
}
}
// ---------------------------------------------------------------------------
// NATS emission
// ---------------------------------------------------------------------------
@@ -174,275 +37,48 @@ func EmitNATS(peerID string, message tools.PropalgationMessage) {
})
}
type executionConsidersPayload struct {
ID string `json:"id"`
ExecutionsID string `json:"executions_id"`
ExecutionID string `json:"execution_id"`
PeerIDs []string `json:"peer_ids"`
}
// emitConsiders broadcasts a PROPALGATION_EVENT with the Considers action,
// carrying the stored resource ID and its datatype (BOOKING or PURCHASE_RESOURCE).
func emitConsiders(id string, executionID string, dt tools.DataType) {
access := oclib.NewRequestAdmin(oclib.LibDataEnum(oclib.WORKFLOW_EXECUTION), nil)
data := access.LoadOne(executionID)
if data.ToWorkflowExecution() != nil {
exec := data.ToWorkflowExecution()
if peers, err := GetWorkflowPeerIDs(exec.WorkflowID, &tools.APIRequest{Admin: true}); err == nil {
payload, _ := json.Marshal(&executionConsidersPayload{
ID: id,
ExecutionsID: exec.ExecutionsID,
ExecutionID: executionID,
PeerIDs: peers,
})
b, _ := json.Marshal(tools.PropalgationMessage{
DataType: int(dt),
Action: tools.PB_CONSIDERS,
Payload: payload,
})
tools.NewNATSCaller().SetNATSPub(tools.PROPALGATION_EVENT, tools.NATSResponse{
FromApp: "oc-scheduler",
Datatype: dt,
Method: int(tools.PROPALGATION_EVENT),
Payload: b,
})
}
}
}
// EmitConsidersExecution broadcasts a Considers / WORKFLOW_EXECUTION message to all
// storage and compute peers of wf once the execution has transitioned to SCHEDULED.
// Each receiving peer will use it to confirm (IsDraft=false) their local drafts.
func EmitConsidersExecution(exec *workflow_execution.WorkflowExecution, wf *workflow.Workflow) {
if wf == nil || wf.Graph == nil {
return
}
peerIDs, err := GetWorkflowPeerIDs(wf.GetID(), &tools.APIRequest{Admin: true})
if err != nil {
return
}
if len(peerIDs) == 0 {
return
}
payload, err := json.Marshal(executionConsidersPayload{
ID: exec.GetID(),
ExecutionID: exec.GetID(),
ExecutionsID: exec.ExecutionsID,
PeerIDs: peerIDs})
if err != nil {
return
}
b, err := json.Marshal(tools.PropalgationMessage{
DataType: int(tools.WORKFLOW_EXECUTION),
Action: tools.PB_CONSIDERS,
Payload: payload,
})
if err != nil {
return
}
tools.NewNATSCaller().SetNATSPub(tools.PROPALGATION_EVENT, tools.NATSResponse{
FromApp: "oc-scheduler",
Datatype: tools.WORKFLOW_EXECUTION,
Method: int(tools.PROPALGATION_EVENT),
Payload: b,
})
}
// updateExecutionState sets BookingsState[id]=true (dt==BOOKING) or
// PurchasesState[id]=true (dt==PURCHASE_RESOURCE) on the target execution.
// payload must be JSON-encoded {"id":"...", "execution_id":"..."}.
func updateExecutionState(payload []byte, dt tools.DataType) {
var data executionConsidersPayload
if err := json.Unmarshal(payload, &data); err != nil || data.ID == "" || data.ExecutionID == "" {
return
}
adminReq := &tools.APIRequest{Admin: true}
res, _, err := workflow_execution.NewAccessor(adminReq).LoadOne(data.ExecutionID)
if err != nil || res == nil {
fmt.Printf("updateExecutionState: could not load execution %s: %v\n", data.ExecutionID, err)
return
}
exec := res.(*workflow_execution.WorkflowExecution)
switch dt {
case tools.BOOKING:
if exec.BookingsState == nil {
exec.BookingsState = map[string]bool{}
}
exec.BookingsState[data.ID] = true
case tools.PURCHASE_RESOURCE:
if exec.PurchasesState == nil {
exec.PurchasesState = map[string]bool{}
}
exec.PurchasesState[data.ID] = true
}
found := true
for _, st := range exec.BookingsState {
if !st {
found = false
break
}
}
for _, st := range exec.PurchasesState {
if !st {
found = false
break
}
}
if found {
exec.State = enum.SCHEDULED
}
if _, _, err := utils.GenericRawUpdateOne(exec, data.ExecutionID, workflow_execution.NewAccessor(adminReq)); err != nil {
fmt.Printf("updateExecutionState: could not update execution %s: %v\n", data.ExecutionID, err)
}
}
// confirmExecutionDrafts is called when a Considers/WORKFLOW_EXECUTION message
// is received from oc-discovery, meaning the originating peer has confirmed the
// execution as SCHEDULED. For every booking and purchase ID listed in the
// execution's states, we confirm the local draft (IsDraft=false).
func confirmExecutionDrafts(payload []byte) {
var data executionConsidersPayload
if err := json.Unmarshal(payload, &data); err != nil {
fmt.Printf("confirmExecutionDrafts: could not parse payload: %v\n", err)
return
}
access := oclib.NewRequestAdmin(oclib.LibDataEnum(tools.WORKFLOW_EXECUTION), nil)
d := access.LoadOne(data.ExecutionID)
if exec := d.ToWorkflowExecution(); exec != nil {
for id := range exec.BookingsState {
go confirmResource(id, tools.BOOKING)
}
for id := range exec.PurchasesState {
go confirmResource(id, tools.PURCHASE_RESOURCE)
}
}
}
// ---------------------------------------------------------------------------
// NATS listeners
// ---------------------------------------------------------------------------
func ListenNATS() {
tools.NewNATSCaller().ListenNats(map[tools.NATSMethod]func(tools.NATSResponse){
// Receive planner snapshots pushed by oc-discovery and cache them.
// Considers messages:
// BOOKING / PURCHASE_RESOURCE → mark the individual resource as
// considered in the target WorkflowExecution (BookingsState / PurchasesState).
// WORKFLOW_EXECUTION → the execution reached SCHEDULED; confirm all
// local draft bookings and purchases listed in its states.
tools.PLANNER_EXECUTION: func(resp tools.NATSResponse) {
m := map[string]interface{}{}
p := planner.Planner{}
if err := json.Unmarshal(resp.Payload, &m); err != nil {
return
}
if err := json.Unmarshal(resp.Payload, &p); err != nil {
return
}
storePlanner(fmt.Sprintf("%v", m["peer_id"]), &p)
},
tools.PROPALGATION_EVENT: func(resp tools.NATSResponse) {
if resp.FromApp != "oc-discovery" {
return
}
var prop tools.PropalgationMessage
if err := json.Unmarshal(resp.Payload, &prop); err != nil {
return
}
switch prop.Action {
case tools.PB_CONSIDERS:
switch tools.DataType(prop.DataType) {
case tools.BOOKING, tools.PURCHASE_RESOURCE:
updateExecutionState(prop.Payload, tools.DataType(prop.DataType))
case tools.WORKFLOW_EXECUTION:
confirmExecutionDrafts(prop.Payload)
}
}
},
// Incoming resource creation events:
// - WORKFLOW → refresh peer planner entries and notify CheckStream watchers.
// - BOOKING → if destined for us, validate, store as draft, start 10-min
// expiry timer, and emit a "considers_booking" response.
// - PURCHASE → if destined for us, store as draft, start 10-min expiry
// timer, and emit a "considers_purchase" response.
tools.REMOVE_RESOURCE: func(resp tools.NATSResponse) {
switch resp.Datatype {
case tools.WORKFLOW:
wf := workflow.Workflow{}
if err := json.Unmarshal(resp.Payload, &wf); err != nil {
return
}
notifyWorkflowWatchers(wf.GetID())
}
},
tools.CREATE_RESOURCE: func(resp tools.NATSResponse) {
switch resp.Datatype {
case tools.WORKFLOW:
wf := workflow.Workflow{}
if err := json.Unmarshal(resp.Payload, &wf); err != nil {
return
}
broadcastPlanner(&wf)
notifyWorkflowWatchers(wf.GetID())
case tools.BOOKING:
var bk booking.Booking
if err := json.Unmarshal(resp.Payload, &bk); err != nil {
return
}
self, err := oclib.GetMySelf()
if err != nil || self == nil || bk.DestPeerID != self.GetID() {
return
}
// Reject bookings whose start date is already in the past.
if !bk.ExpectedStartDate.IsZero() && bk.ExpectedStartDate.Before(time.Now()) {
fmt.Println("ListenNATS: booking start date is in the past, discarding")
return
}
// Verify the slot is free in our planner (if we have one).
plannerMu.RLock()
selfEntry := PlannerCache[self.PeerID]
plannerMu.RUnlock()
if selfEntry != nil && selfEntry.Planner != nil && !checkInstance(selfEntry.Planner, bk.ResourceID, bk.InstanceID, bk.ExpectedStartDate, bk.ExpectedEndDate) {
fmt.Println("ListenNATS: booking conflicts with local planner, discarding")
return
}
adminReq := &tools.APIRequest{Admin: true}
bk.IsDraft = true
stored, _, err := booking.NewAccessor(adminReq).StoreOne(&bk)
if err != nil {
fmt.Println("ListenNATS: could not store booking:", err)
return
}
storedID := stored.GetID()
go refreshSelfPlanner(self.PeerID, adminReq)
time.AfterFunc(10*time.Minute, func() { draftTimeout(storedID, tools.BOOKING) })
go emitConsiders(storedID, stored.(*booking.Booking).ExecutionID, tools.BOOKING)
case tools.PURCHASE_RESOURCE:
var pr purchase_resource.PurchaseResource
if err := json.Unmarshal(resp.Payload, &pr); err != nil {
return
}
self, err := oclib.GetMySelf()
if err != nil || self == nil || pr.DestPeerID != self.GetID() {
return
}
adminReq := &tools.APIRequest{Admin: true}
pr.IsDraft = true
stored, _, err := purchase_resource.NewAccessor(adminReq).StoreOne(&pr)
if err != nil {
fmt.Println("ListenNATS: could not store purchase:", err)
return
}
storedID := stored.GetID()
time.AfterFunc(10*time.Minute, func() { draftTimeout(storedID, tools.PURCHASE_RESOURCE) })
go emitConsiders(storedID, stored.(*purchase_resource.PurchaseResource).ExecutionID, tools.PURCHASE_RESOURCE)
}
},
tools.PLANNER_EXECUTION: handlePlannerExecution,
tools.PROPALGATION_EVENT: handlePropagationEvent,
tools.REMOVE_RESOURCE: handleRemoveResource,
tools.CREATE_RESOURCE: handleCreateResource,
})
}
// ---------------------------------------------------------------------------
// Confirm channels
// ---------------------------------------------------------------------------
// ListenConfirm opens a direct NATS connection and subscribes to the hardcoded
// "confirm_booking" and "confirm_purchase" subjects. It reconnects automatically
// if the connection is lost.
func ListenConfirm() {
natsURL := config.GetConfig().NATSUrl
if natsURL == "" {
fmt.Println("ListenConfirm: NATS_SERVER not set, skipping confirm listeners")
return
}
for {
nc, err := nats.Connect(natsURL)
if err != nil {
fmt.Println("ListenConfirm: could not connect to NATS:", err)
time.Sleep(time.Minute)
continue
}
var wg sync.WaitGroup
wg.Add(2)
go listenConfirmChannel(nc, "confirm_booking", tools.BOOKING, &wg)
go listenConfirmChannel(nc, "confirm_purchase", tools.PURCHASE_RESOURCE, &wg)
wg.Wait()
nc.Close()
}
}
// ---------------------------------------------------------------------------
// Draft timeout
// ---------------------------------------------------------------------------
@@ -474,254 +110,9 @@ func draftTimeout(id string, dt tools.DataType) {
}
// ---------------------------------------------------------------------------
// Confirm channels
// Kubernetes namespace helper
// ---------------------------------------------------------------------------
// confirmResource sets IsDraft=false for a booking or purchase resource.
// For bookings it also advances State to SCHEDULED and refreshes the local planner.
func confirmResource(id string, dt tools.DataType) {
adminReq := &tools.APIRequest{Admin: true}
switch dt {
case tools.BOOKING:
res, _, err := booking.NewAccessor(adminReq).LoadOne(id)
if err != nil || res == nil {
fmt.Printf("confirmResource: could not load booking %s: %v\n", id, err)
return
}
bk := res.(*booking.Booking)
bk.IsDraft = false
bk.State = enum.SCHEDULED
if _, _, err := utils.GenericRawUpdateOne(bk, id, booking.NewAccessor(adminReq)); err != nil {
fmt.Printf("confirmResource: could not confirm booking %s: %v\n", id, err)
return
}
createNamespace(bk.ExecutionsID) // create Namespace locally
self, err := oclib.GetMySelf()
if err == nil && self != nil {
go refreshSelfPlanner(self.PeerID, adminReq)
}
case tools.PURCHASE_RESOURCE:
res, _, err := purchase_resource.NewAccessor(adminReq).LoadOne(id)
if err != nil || res == nil {
fmt.Printf("confirmResource: could not load purchase %s: %v\n", id, err)
return
}
pr := res.(*purchase_resource.PurchaseResource)
pr.IsDraft = false
if _, _, err := utils.GenericRawUpdateOne(pr, id, purchase_resource.NewAccessor(adminReq)); err != nil {
fmt.Printf("confirmResource: could not confirm purchase %s: %v\n", id, err)
}
}
}
// listenConfirmChannel subscribes to a NATS subject and calls confirmResource
// for each message received. The message body is expected to be the plain
// resource ID (UTF-8 string).
func listenConfirmChannel(nc *nats.Conn, subject string, dt tools.DataType, wg *sync.WaitGroup) {
defer wg.Done()
ch := make(chan *nats.Msg, 64)
sub, err := nc.ChanSubscribe(subject, ch)
if err != nil {
fmt.Printf("listenConfirmChannel: could not subscribe to %s: %v\n", subject, err)
return
}
defer sub.Unsubscribe()
for msg := range ch {
confirmResource(string(msg.Data), dt)
}
}
// ListenConfirm opens a direct NATS connection and subscribes to the hardcoded
// "confirm_booking" and "confirm_purchase" subjects. It reconnects automatically
// if the connection is lost.
func ListenConfirm() {
natsURL := config.GetConfig().NATSUrl
if natsURL == "" {
fmt.Println("ListenConfirm: NATS_SERVER not set, skipping confirm listeners")
return
}
for {
nc, err := nats.Connect(natsURL)
if err != nil {
fmt.Println("ListenConfirm: could not connect to NATS:", err)
time.Sleep(time.Minute)
continue
}
var wg sync.WaitGroup
wg.Add(2)
go listenConfirmChannel(nc, "confirm_booking", tools.BOOKING, &wg)
go listenConfirmChannel(nc, "confirm_purchase", tools.PURCHASE_RESOURCE, &wg)
wg.Wait()
nc.Close()
}
}
// ---------------------------------------------------------------------------
// Self-planner initialisation
// ---------------------------------------------------------------------------
// InitSelfPlanner bootstraps our own planner entry at startup.
// It waits (with 15-second retries) for our peer record to be present in the
// database before generating the first planner snapshot and broadcasting it
// on PB_PLANNER. This handles the race between oc-scheduler starting before
// oc-peer has fully registered our node.
func InitSelfPlanner() {
for {
self, err := oclib.GetMySelf()
if err != nil || self == nil {
fmt.Println("InitSelfPlanner: self peer not found yet, retrying in 15s...")
time.Sleep(15 * time.Second)
continue
}
refreshSelfPlanner(self.PeerID, &tools.APIRequest{Admin: true})
return
}
}
// ---------------------------------------------------------------------------
// Self-planner refresh
// ---------------------------------------------------------------------------
// refreshSelfPlanner regenerates the local planner from the current state of
// the booking DB, stores it in PlannerCache under our own node UUID, and
// broadcasts it on PROPALGATION_EVENT / PB_PLANNER so all listeners (including
// oc-discovery) are kept in sync.
//
// It should be called whenever a booking for our own peer is created, whether
// by direct DB insertion (self-peer routing) or upon receiving a CREATE_RESOURCE
// BOOKING message from oc-discovery.
func refreshSelfPlanner(peerID string, request *tools.APIRequest) {
p, err := planner.GenerateShallow(request)
if err != nil {
fmt.Println("refreshSelfPlanner: could not generate planner:", err)
return
}
// Update the local cache and notify any waiting CheckStream goroutines.
storePlanner(peerID, p)
// Broadcast the updated planner so remote peers (and oc-discovery) can
// refresh their view of our availability.
type plannerWithPeer struct {
PeerID string `json:"peer_id"`
*planner.Planner
}
plannerPayload, err := json.Marshal(plannerWithPeer{PeerID: peerID, Planner: p})
if err != nil {
return
}
EmitNATS(peerID, tools.PropalgationMessage{
Action: tools.PB_PLANNER,
Payload: plannerPayload,
})
}
// ---------------------------------------------------------------------------
// Planner broadcast
// ---------------------------------------------------------------------------
// RequestPlannerRefresh asks oc-discovery for a fresh planner snapshot for
// each peer in peerIDs. Only the first session to request a given peer becomes
// its "refresh owner": subsequent sessions see Refreshing=true and skip the
// duplicate PB_PLANNER emission. Returns the subset of peerIDs for which this
// session claimed ownership (needed to release on close).
func RequestPlannerRefresh(peerIDs []string, sessionID string) []string {
var owned []string
for _, peerID := range peerIDs {
plannerMu.Lock()
entry := PlannerCache[peerID]
if entry == nil {
entry = &plannerEntry{}
PlannerCache[peerID] = entry
plannerAddedAt[peerID] = time.Now()
go evictAfter(peerID, plannerTTL)
}
shouldRequest := !entry.Refreshing
if shouldRequest {
entry.Refreshing = true
entry.RefreshOwner = sessionID
}
plannerMu.Unlock()
if shouldRequest {
owned = append(owned, peerID)
payload, _ := json.Marshal(map[string]any{"peer_id": peerID})
EmitNATS(peerID, tools.PropalgationMessage{
Action: tools.PB_PLANNER,
Payload: payload,
})
}
}
return owned
}
// ReleaseRefreshOwnership is called when a check session closes (clean or
// forced). For each peer this session owns, it resets the refresh state and
// emits PB_CLOSE_PLANNER so oc-discovery stops the planner stream.
// The planner data itself stays in the cache until TTL eviction.
func ReleaseRefreshOwnership(peerIDs []string, sessionID string) {
for _, peerID := range peerIDs {
plannerMu.Lock()
if entry := PlannerCache[peerID]; entry != nil && entry.RefreshOwner == sessionID {
entry.Refreshing = false
entry.RefreshOwner = ""
}
plannerMu.Unlock()
payload, _ := json.Marshal(map[string]any{"peer_id": peerID})
EmitNATS(peerID, tools.PropalgationMessage{
Action: tools.PB_CLOSE_PLANNER,
Payload: payload,
})
}
}
// broadcastPlanner iterates the storage and compute peers of the given workflow
// and, for each peer not yet in the cache, emits a PB_PLANNER propagation so
// downstream consumers (oc-discovery, other schedulers) refresh their state.
func broadcastPlanner(wf *workflow.Workflow) {
if wf.Graph == nil {
return
}
items := []graph.GraphItem{}
items = append(items, wf.GetGraphItems(wf.Graph.IsStorage)...)
items = append(items, wf.GetGraphItems(wf.Graph.IsCompute)...)
seen := []string{}
for _, item := range items {
i := item
_, res := i.GetResource()
if res == nil {
continue
}
creatorID := res.GetCreatorID()
if slices.Contains(seen, creatorID) {
continue
}
data := oclib.NewRequestAdmin(oclib.LibDataEnum(oclib.PEER), nil).LoadOne(creatorID)
p := data.ToPeer()
if p == nil {
continue
}
plannerMu.RLock()
cached := PlannerCache[p.PeerID]
plannerMu.RUnlock()
// Only request if no snapshot and no refresh already in flight.
if cached == nil || (cached.Planner == nil && !cached.Refreshing) {
payload, err := json.Marshal(map[string]interface{}{"peer_id": p.PeerID})
if err != nil {
continue
}
seen = append(seen, creatorID)
EmitNATS(p.PeerID, tools.PropalgationMessage{
Action: tools.PB_PLANNER,
Payload: payload,
})
}
}
}
func createNamespace(ns string) error {
/*
* This function is used to create a namespace.

View File

@@ -0,0 +1,274 @@
package infrastructure
import (
"encoding/json"
"fmt"
"sync"
"time"
oclib "cloud.o-forge.io/core/oc-lib"
"cloud.o-forge.io/core/oc-lib/models/booking"
"cloud.o-forge.io/core/oc-lib/models/booking/planner"
"cloud.o-forge.io/core/oc-lib/models/common/enum"
"cloud.o-forge.io/core/oc-lib/models/peer"
"cloud.o-forge.io/core/oc-lib/models/resources/purchase_resource"
"cloud.o-forge.io/core/oc-lib/models/utils"
"cloud.o-forge.io/core/oc-lib/models/workflow"
"cloud.o-forge.io/core/oc-lib/tools"
"github.com/nats-io/nats.go"
)
func handlePlannerExecution(resp tools.NATSResponse) {
m := map[string]interface{}{}
p := planner.Planner{}
if err := json.Unmarshal(resp.Payload, &m); err != nil {
return
}
if err := json.Unmarshal(resp.Payload, &p); err != nil {
return
}
storePlanner(fmt.Sprintf("%v", m["peer_id"]), &p)
}
func handlePropagationEvent(resp tools.NATSResponse) {
if resp.FromApp != "oc-discovery" {
return
}
var prop tools.PropalgationMessage
if err := json.Unmarshal(resp.Payload, &prop); err != nil {
return
}
switch prop.Action {
case tools.PB_CONSIDERS:
fmt.Println("PB_CONSIDERS")
switch tools.DataType(prop.DataType) {
case tools.BOOKING, tools.PURCHASE_RESOURCE:
fmt.Println("updateExecutionState", tools.DataType(prop.DataType))
updateExecutionState(prop.Payload, tools.DataType(prop.DataType))
case tools.WORKFLOW_EXECUTION:
confirmExecutionDrafts(prop.Payload)
}
}
}
func handleRemoveResource(resp tools.NATSResponse) {
switch resp.Datatype {
case tools.WORKFLOW:
wf := workflow.Workflow{}
if err := json.Unmarshal(resp.Payload, &wf); err != nil {
return
}
notifyWorkflowWatchers(wf.GetID())
case tools.BOOKING:
var p removeResourcePayload
if err := json.Unmarshal(resp.Payload, &p); err != nil {
return
}
self, err := oclib.GetMySelf()
if err != nil || self == nil {
return
}
adminReq := &tools.APIRequest{Admin: true}
res, _, loadErr := booking.NewAccessor(adminReq).LoadOne(p.ID)
if loadErr != nil || res == nil {
return
}
existing := res.(*booking.Booking)
if existing.SchedulerPeerID != p.SchedulerPeerID || existing.ExecutionsID != p.ExecutionsID {
fmt.Println("ListenNATS REMOVE_RESOURCE booking: auth mismatch, ignoring", p.ID)
return
}
booking.NewAccessor(adminReq).DeleteOne(p.ID)
go refreshSelfPlanner(self.PeerID, adminReq)
case tools.PURCHASE_RESOURCE:
var p removeResourcePayload
if err := json.Unmarshal(resp.Payload, &p); err != nil {
return
}
adminReq := &tools.APIRequest{Admin: true}
res, _, loadErr := purchase_resource.NewAccessor(adminReq).LoadOne(p.ID)
if loadErr != nil || res == nil {
return
}
existing := res.(*purchase_resource.PurchaseResource)
if existing.SchedulerPeerID != p.SchedulerPeerID || existing.ExecutionsID != p.ExecutionsID {
fmt.Println("ListenNATS REMOVE_RESOURCE purchase: auth mismatch, ignoring", p.ID)
return
}
purchase_resource.NewAccessor(adminReq).DeleteOne(p.ID)
}
}
func handleCreateBooking(bk *booking.Booking, self *peer.Peer, adminReq *tools.APIRequest) {
// Upsert: if a booking with this ID already exists, verify auth and update.
if existing, _, loadErr := booking.NewAccessor(adminReq).LoadOne(bk.GetID()); loadErr == nil && existing != nil {
prev := existing.(*booking.Booking)
if prev.SchedulerPeerID != bk.SchedulerPeerID || prev.ExecutionsID != bk.ExecutionsID {
fmt.Println("ListenNATS CREATE_RESOURCE booking upsert: auth mismatch, ignoring", bk.GetID())
return
}
if !prev.IsDrafted() && bk.IsDraft {
// Already confirmed, refuse downgrade.
return
}
// Expired check only on confirmation (IsDraft→false).
if !bk.IsDraft && !prev.ExpectedStartDate.IsZero() && prev.ExpectedStartDate.Before(time.Now()) {
fmt.Println("ListenNATS CREATE_RESOURCE booking: expired, deleting", bk.GetID())
booking.NewAccessor(adminReq).DeleteOne(bk.GetID())
return
}
if _, _, err := utils.GenericRawUpdateOne(bk, bk.GetID(), booking.NewAccessor(adminReq)); err != nil {
fmt.Println("ListenNATS CREATE_RESOURCE booking update failed:", err)
return
}
go refreshSelfPlanner(self.PeerID, adminReq)
if !bk.IsDraft {
go applyConsidersLocal(bk.GetID(), tools.BOOKING)
}
return
}
// New booking: standard create flow.
if !bk.ExpectedStartDate.IsZero() && bk.ExpectedStartDate.Before(time.Now()) {
fmt.Println("ListenNATS: booking start date is in the past, discarding")
return
}
plannerMu.RLock()
selfEntry := PlannerCache[self.PeerID]
plannerMu.RUnlock()
if selfEntry != nil && selfEntry.Planner != nil && !checkInstance(selfEntry.Planner, bk.ResourceID, bk.InstanceID, bk.ExpectedStartDate, bk.ExpectedEndDate) {
fmt.Println("ListenNATS: booking conflicts with local planner, discarding")
return
}
bk.IsDraft = true
stored, _, err := booking.NewAccessor(adminReq).StoreOne(bk)
if err != nil {
fmt.Println("ListenNATS: could not store booking:", err)
return
}
storedID := stored.GetID()
go refreshSelfPlanner(self.PeerID, adminReq)
time.AfterFunc(10*time.Minute, func() { draftTimeout(storedID, tools.BOOKING) })
}
func handleCreatePurchase(pr *purchase_resource.PurchaseResource, self *peer.Peer, adminReq *tools.APIRequest) {
if pr.DestPeerID != self.GetID() {
return
}
// Upsert: if a purchase with this ID already exists, verify auth and update.
if existing, _, loadErr := purchase_resource.NewAccessor(adminReq).LoadOne(pr.GetID()); loadErr == nil && existing != nil {
prev := existing.(*purchase_resource.PurchaseResource)
if prev.SchedulerPeerID != pr.SchedulerPeerID || prev.ExecutionsID != pr.ExecutionsID {
fmt.Println("ListenNATS CREATE_RESOURCE purchase upsert: auth mismatch, ignoring", pr.GetID())
return
}
if !prev.IsDrafted() && pr.IsDraft {
return
}
if _, _, err := utils.GenericRawUpdateOne(pr, pr.GetID(), purchase_resource.NewAccessor(adminReq)); err != nil {
fmt.Println("ListenNATS CREATE_RESOURCE purchase update failed:", err)
return
}
if !pr.IsDraft {
go applyConsidersLocal(pr.GetID(), tools.PURCHASE_RESOURCE)
}
return
}
// New purchase: standard create flow.
pr.IsDraft = true
stored, _, err := purchase_resource.NewAccessor(adminReq).StoreOne(pr)
if err != nil {
fmt.Println("ListenNATS: could not store purchase:", err)
return
}
storedID := stored.GetID()
time.AfterFunc(10*time.Minute, func() { draftTimeout(storedID, tools.PURCHASE_RESOURCE) })
}
func handleCreateResource(resp tools.NATSResponse) {
switch resp.Datatype {
case tools.WORKFLOW:
wf := workflow.Workflow{}
if err := json.Unmarshal(resp.Payload, &wf); err != nil {
return
}
broadcastPlanner(&wf)
notifyWorkflowWatchers(wf.GetID())
case tools.BOOKING:
var bk booking.Booking
if err := json.Unmarshal(resp.Payload, &bk); err != nil {
return
}
self, err := oclib.GetMySelf()
/*if err != nil || self == nil || bk.DestPeerID != self.GetID() {
return
}*/
adminReq := &tools.APIRequest{Admin: true}
_ = err
handleCreateBooking(&bk, self, adminReq)
case tools.PURCHASE_RESOURCE:
var pr purchase_resource.PurchaseResource
if err := json.Unmarshal(resp.Payload, &pr); err != nil {
return
}
self, err := oclib.GetMySelf()
if err != nil || self == nil {
return
}
adminReq := &tools.APIRequest{Admin: true}
handleCreatePurchase(&pr, self, adminReq)
}
}
// confirmResource sets IsDraft=false for a booking or purchase resource.
// For bookings it also advances State to SCHEDULED and refreshes the local planner.
func confirmResource(id string, dt tools.DataType) {
adminReq := &tools.APIRequest{Admin: true}
switch dt {
case tools.BOOKING:
res, _, err := booking.NewAccessor(adminReq).LoadOne(id)
if err != nil || res == nil {
fmt.Printf("confirmResource: could not load booking %s: %v\n", id, err)
return
}
bk := res.(*booking.Booking)
bk.IsDraft = false
bk.State = enum.SCHEDULED
if _, _, err := utils.GenericRawUpdateOne(bk, id, booking.NewAccessor(adminReq)); err != nil {
fmt.Printf("confirmResource: could not confirm booking %s: %v\n", id, err)
return
}
createNamespace(bk.ExecutionsID) // create Namespace locally
self, err := oclib.GetMySelf()
if err == nil && self != nil {
go refreshSelfPlanner(self.PeerID, adminReq)
}
case tools.PURCHASE_RESOURCE:
res, _, err := purchase_resource.NewAccessor(adminReq).LoadOne(id)
if err != nil || res == nil {
fmt.Printf("confirmResource: could not load purchase %s: %v\n", id, err)
return
}
pr := res.(*purchase_resource.PurchaseResource)
pr.IsDraft = false
if _, _, err := utils.GenericRawUpdateOne(pr, id, purchase_resource.NewAccessor(adminReq)); err != nil {
fmt.Printf("confirmResource: could not confirm purchase %s: %v\n", id, err)
}
}
}
// listenConfirmChannel subscribes to a NATS subject and calls confirmResource
// for each message received. The message body is expected to be the plain
// resource ID (UTF-8 string).
func listenConfirmChannel(nc *nats.Conn, subject string, dt tools.DataType, wg *sync.WaitGroup) {
defer wg.Done()
ch := make(chan *nats.Msg, 64)
sub, err := nc.ChanSubscribe(subject, ch)
if err != nil {
fmt.Printf("listenConfirmChannel: could not subscribe to %s: %v\n", subject, err)
return
}
defer sub.Unsubscribe()
for msg := range ch {
confirmResource(string(msg.Data), dt)
}
}

353
infrastructure/planner.go Normal file
View File

@@ -0,0 +1,353 @@
package infrastructure
import (
"encoding/json"
"fmt"
"slices"
"sync"
"time"
oclib "cloud.o-forge.io/core/oc-lib"
"cloud.o-forge.io/core/oc-lib/models/booking/planner"
"cloud.o-forge.io/core/oc-lib/models/workflow"
"cloud.o-forge.io/core/oc-lib/models/workflow/graph"
"cloud.o-forge.io/core/oc-lib/tools"
)
const plannerTTL = 24 * time.Hour
// ---------------------------------------------------------------------------
// Planner cache — protected by plannerMu
// ---------------------------------------------------------------------------
// plannerEntry wraps a planner snapshot with refresh-ownership tracking.
// At most one check session may be the "refresh owner" of a given peer's
// planner at a time: it emits PB_PLANNER to request a fresh snapshot from
// oc-discovery and, on close (clean or forced), emits PB_CLOSE_PLANNER to
// release the stream. Any subsequent session that needs the same peer's
// planner will see Refreshing=true and skip the duplicate request.
type plannerEntry struct {
Planner *planner.Planner
Refreshing bool // true while a PB_PLANNER request is in flight
RefreshOwner string // session UUID that initiated the current refresh
}
var plannerMu sync.RWMutex
var PlannerCache = map[string]*plannerEntry{}
var plannerAddedAt = map[string]time.Time{} // peerID → first-seen timestamp
// ---------------------------------------------------------------------------
// Subscriber registries — one keyed by peerID, one by workflowID
// ---------------------------------------------------------------------------
var subsMu sync.RWMutex
var plannerSubs = map[string][]chan string{} // peerID → channels (deliver peerID)
var workflowSubs = map[string][]chan struct{}{} // workflowID → notification channels
// subscribePlanners registers interest in planner changes for the given peer IDs.
// The returned channel receives the peerID string (non-blocking) each time any
// of those planners is updated. Call cancel to unregister.
func subscribePlanners(peerIDs []string) (<-chan string, func()) {
ch := make(chan string, 1)
subsMu.Lock()
for _, k := range peerIDs {
plannerSubs[k] = append(plannerSubs[k], ch)
}
subsMu.Unlock()
cancel := func() {
subsMu.Lock()
for _, k := range peerIDs {
subs := plannerSubs[k]
for i, s := range subs {
if s == ch {
plannerSubs[k] = append(subs[:i], subs[i+1:]...)
break
}
}
}
subsMu.Unlock()
}
return ch, cancel
}
// SubscribePlannerUpdates registers interest in planner changes for the given
// peer IDs. The returned channel receives the peerID string (non-blocking) each
// time any of those planners is updated. Call cancel to unregister.
func SubscribePlannerUpdates(peerIDs []string) (<-chan string, func()) {
return subscribePlanners(peerIDs)
}
// SubscribeWorkflowUpdates registers interest in workflow modifications for the
// given workflow ID. The returned channel is signalled when the workflow changes
// (peer list may have grown or shrunk). Call cancel to unregister.
func SubscribeWorkflowUpdates(wfID string) (<-chan struct{}, func()) {
ch, cancel := subscribe(&subsMu, workflowSubs, []string{wfID})
return ch, cancel
}
// subscribe is the generic helper used by the workflow registry.
func subscribe(mu *sync.RWMutex, registry map[string][]chan struct{}, keys []string) (<-chan struct{}, func()) {
ch := make(chan struct{}, 1)
mu.Lock()
for _, k := range keys {
registry[k] = append(registry[k], ch)
}
mu.Unlock()
cancel := func() {
mu.Lock()
for _, k := range keys {
subs := registry[k]
for i, s := range subs {
if s == ch {
registry[k] = append(subs[:i], subs[i+1:]...)
break
}
}
}
mu.Unlock()
}
return ch, cancel
}
func notifyPlannerWatchers(peerID string) {
subsMu.RLock()
subs := plannerSubs[peerID]
subsMu.RUnlock()
for _, ch := range subs {
select {
case ch <- peerID:
default:
}
}
}
func notifyWorkflowWatchers(wfID string) {
notify(&subsMu, workflowSubs, wfID)
}
func notify(mu *sync.RWMutex, registry map[string][]chan struct{}, key string) {
mu.RLock()
subs := registry[key]
mu.RUnlock()
for _, ch := range subs {
select {
case ch <- struct{}{}:
default:
}
}
}
// ---------------------------------------------------------------------------
// Cache helpers
// ---------------------------------------------------------------------------
// storePlanner inserts or updates the planner snapshot for peerID.
// On first insertion it schedules an automatic eviction after plannerTTL.
// Existing refresh-ownership state (Refreshing / RefreshOwner) is preserved
// so that an in-flight request is not inadvertently reset.
// All subscribers interested in this peer are notified.
func storePlanner(peerID string, p *planner.Planner) {
plannerMu.Lock()
entry := PlannerCache[peerID]
isNew := entry == nil
if isNew {
entry = &plannerEntry{}
PlannerCache[peerID] = entry
plannerAddedAt[peerID] = time.Now()
go evictAfter(peerID, plannerTTL)
}
entry.Planner = p
plannerMu.Unlock()
notifyPlannerWatchers(peerID)
}
// evictAfter waits ttl from first insertion then deletes the cache entry and
// emits PB_CLOSE_PLANNER so oc-discovery stops streaming for this peer.
// This is the only path that actually removes an entry from PlannerCache;
// session close (ReleaseRefreshOwnership) only resets ownership state.
func evictAfter(peerID string, ttl time.Duration) {
time.Sleep(ttl)
plannerMu.Lock()
_, exists := PlannerCache[peerID]
if exists {
delete(PlannerCache, peerID)
delete(plannerAddedAt, peerID)
}
plannerMu.Unlock()
if exists {
EmitNATS(peerID, tools.PropalgationMessage{Action: tools.PB_CLOSE_PLANNER})
}
}
// ---------------------------------------------------------------------------
// Planner refresh / broadcast
// ---------------------------------------------------------------------------
// RequestPlannerRefresh asks oc-discovery for a fresh planner snapshot for
// each peer in peerIDs. Only the first session to request a given peer becomes
// its "refresh owner": subsequent sessions see Refreshing=true and skip the
// duplicate PB_PLANNER emission. Returns the subset of peerIDs for which this
// session claimed ownership (needed to release on close).
func RequestPlannerRefresh(peerIDs []string, executionsID string) []string {
var owned []string
for _, peerID := range peerIDs {
plannerMu.Lock()
entry := PlannerCache[peerID]
if entry == nil {
entry = &plannerEntry{}
PlannerCache[peerID] = entry
plannerAddedAt[peerID] = time.Now()
go evictAfter(peerID, plannerTTL)
}
shouldRequest := !entry.Refreshing
if shouldRequest {
entry.Refreshing = true
entry.RefreshOwner = executionsID
}
plannerMu.Unlock()
if shouldRequest {
owned = append(owned, peerID)
if p, err := oclib.GetMySelf(); err == nil && p != nil && p.PeerID == peerID {
// Self peer: generate and cache the planner directly without
// going through NATS / oc-discovery.
go refreshSelfPlanner(peerID, &tools.APIRequest{Admin: true})
} else {
payload, _ := json.Marshal(map[string]any{"peer_id": peerID})
fmt.Println("PB_PLANNER", peerID)
EmitNATS(peerID, tools.PropalgationMessage{
Action: tools.PB_PLANNER,
Payload: payload,
})
}
}
}
return owned
}
// ReleaseRefreshOwnership is called when a check session closes (clean or
// forced). For each peer this session owns, it resets the refresh state and
// emits PB_CLOSE_PLANNER so oc-discovery stops the planner stream.
// The planner data itself stays in the cache until TTL eviction.
func ReleaseRefreshOwnership(peerIDs []string, executionsID string) {
for _, peerID := range peerIDs {
plannerMu.Lock()
if entry := PlannerCache[peerID]; entry != nil && entry.RefreshOwner == executionsID {
entry.Refreshing = false
entry.RefreshOwner = ""
}
plannerMu.Unlock()
payload, _ := json.Marshal(map[string]any{"peer_id": peerID})
EmitNATS(peerID, tools.PropalgationMessage{
Action: tools.PB_CLOSE_PLANNER,
Payload: payload,
})
}
}
// broadcastPlanner iterates the storage and compute peers of the given workflow
// and, for each peer not yet in the cache, emits a PB_PLANNER propagation so
// downstream consumers (oc-discovery, other schedulers) refresh their state.
func broadcastPlanner(wf *workflow.Workflow) {
if wf.Graph == nil {
return
}
items := []graph.GraphItem{}
items = append(items, wf.GetGraphItems(wf.Graph.IsStorage)...)
items = append(items, wf.GetGraphItems(wf.Graph.IsCompute)...)
seen := []string{}
for _, item := range items {
i := item
_, res := i.GetResource()
if res == nil {
continue
}
creatorID := res.GetCreatorID()
if slices.Contains(seen, creatorID) {
continue
}
data := oclib.NewRequestAdmin(oclib.LibDataEnum(oclib.PEER), nil).LoadOne(creatorID)
p := data.ToPeer()
if p == nil {
continue
}
plannerMu.RLock()
cached := PlannerCache[p.PeerID]
plannerMu.RUnlock()
// Only request if no snapshot and no refresh already in flight.
if cached == nil || (cached.Planner == nil && !cached.Refreshing) {
payload, err := json.Marshal(map[string]interface{}{"peer_id": p.PeerID})
if err != nil {
continue
}
seen = append(seen, creatorID)
EmitNATS(p.PeerID, tools.PropalgationMessage{
Action: tools.PB_PLANNER,
Payload: payload,
})
}
}
}
// ---------------------------------------------------------------------------
// Self-planner initialisation
// ---------------------------------------------------------------------------
// InitSelfPlanner bootstraps our own planner entry at startup.
// It waits (with 15-second retries) for our peer record to be present in the
// database before generating the first planner snapshot and broadcasting it
// on PB_PLANNER. This handles the race between oc-scheduler starting before
// oc-peer has fully registered our node.
func InitSelfPlanner() {
for {
self, err := oclib.GetMySelf()
if err != nil || self == nil {
fmt.Println("InitSelfPlanner: self peer not found yet, retrying in 15s...")
time.Sleep(15 * time.Second)
continue
}
refreshSelfPlanner(self.PeerID, &tools.APIRequest{Admin: true})
return
}
}
// ---------------------------------------------------------------------------
// Self-planner refresh
// ---------------------------------------------------------------------------
// refreshSelfPlanner regenerates the local planner from the current state of
// the booking DB, stores it in PlannerCache under our own node UUID, and
// broadcasts it on PROPALGATION_EVENT / PB_PLANNER so all listeners (including
// oc-discovery) are kept in sync.
//
// It should be called whenever a booking for our own peer is created, whether
// by direct DB insertion (self-peer routing) or upon receiving a CREATE_RESOURCE
// BOOKING message from oc-discovery.
func refreshSelfPlanner(peerID string, request *tools.APIRequest) {
p, err := planner.GenerateShallow(request)
if err != nil {
fmt.Println("refreshSelfPlanner: could not generate planner:", err)
return
}
// Update the local cache and notify any waiting CheckStream goroutines.
storePlanner(peerID, p)
// Broadcast the updated planner so remote peers (and oc-discovery) can
// refresh their view of our availability.
type plannerWithPeer struct {
PeerID string `json:"peer_id"`
*planner.Planner
}
plannerPayload, err := json.Marshal(plannerWithPeer{PeerID: peerID, Planner: p})
if err != nil {
return
}
EmitNATS(peerID, tools.PropalgationMessage{
Action: tools.PB_PLANNER,
Payload: plannerPayload,
})
}

View File

@@ -4,18 +4,17 @@ import (
"encoding/json"
"errors"
"fmt"
"oc-scheduler/infrastructure/scheduling"
"strings"
"time"
oclib "cloud.o-forge.io/core/oc-lib"
"cloud.o-forge.io/core/oc-lib/models/bill"
"cloud.o-forge.io/core/oc-lib/models/booking"
"cloud.o-forge.io/core/oc-lib/models/booking/planner"
"cloud.o-forge.io/core/oc-lib/models/common/enum"
"cloud.o-forge.io/core/oc-lib/models/common/pricing"
"cloud.o-forge.io/core/oc-lib/models/order"
"cloud.o-forge.io/core/oc-lib/models/peer"
"cloud.o-forge.io/core/oc-lib/models/resources"
"cloud.o-forge.io/core/oc-lib/models/resources/purchase_resource"
"cloud.o-forge.io/core/oc-lib/models/utils"
"cloud.o-forge.io/core/oc-lib/models/workflow"
@@ -48,6 +47,9 @@ type WorkflowSchedule struct {
SelectedStrategies workflow.ConfigItem `json:"selected_strategies"`
SelectedBillingStrategy pricing.BillingStrategy `json:"selected_billing_strategy"`
// Confirm, when true, triggers Schedule() to confirm the drafts held by this session.
Confirm bool `json:"confirm,omitempty"`
}
// TODO PREEMPTION !
@@ -67,7 +69,7 @@ ne pourra se lancé que SI il n'existe pas d'exécution se lançant durant la p
func NewScheduler(mode int, start string, end string, durationInS float64, cron string) *WorkflowSchedule {
ws := &WorkflowSchedule{
UUID: uuid.New().String(),
Start: time.Now(),
Start: time.Now().Add(asapBuffer),
BookingMode: booking.BookingMode(mode),
DurationS: durationInS,
Cron: cron,
@@ -84,21 +86,18 @@ func NewScheduler(mode int, start string, end string, durationInS float64, cron
return ws
}
func (ws *WorkflowSchedule) GetBuyAndBook(wfID string, request *tools.APIRequest) (bool, *workflow.Workflow, []*workflow_execution.WorkflowExecution, []*purchase_resource.PurchaseResource, []*booking.Booking, error) {
if request.Caller == nil && request.Caller.URLS == nil && request.Caller.URLS[tools.BOOKING] == nil || request.Caller.URLS[tools.BOOKING][tools.GET] == "" {
return false, nil, []*workflow_execution.WorkflowExecution{}, []*purchase_resource.PurchaseResource{}, []*booking.Booking{}, errors.New("no caller defined")
}
func (ws *WorkflowSchedule) GetBuyAndBook(wfID string, request *tools.APIRequest) (bool, *workflow.Workflow, []*workflow_execution.WorkflowExecution, []scheduling.SchedulerObject, []scheduling.SchedulerObject, error) {
access := workflow.NewAccessor(request)
res, code, err := access.LoadOne(wfID)
if code != 200 {
return false, nil, []*workflow_execution.WorkflowExecution{}, []*purchase_resource.PurchaseResource{}, []*booking.Booking{}, errors.New("could not load the workflow with id: " + err.Error())
return false, nil, []*workflow_execution.WorkflowExecution{}, []scheduling.SchedulerObject{}, []scheduling.SchedulerObject{}, errors.New("could not load the workflow with id: " + err.Error())
}
wf := res.(*workflow.Workflow)
isPreemptible, longest, priceds, wf, err := wf.Planify(ws.Start, ws.End,
ws.SelectedInstances, ws.SelectedPartnerships, ws.SelectedBuyings, ws.SelectedStrategies,
int(ws.BookingMode), request)
if err != nil {
return false, wf, []*workflow_execution.WorkflowExecution{}, []*purchase_resource.PurchaseResource{}, []*booking.Booking{}, err
return false, wf, []*workflow_execution.WorkflowExecution{}, []scheduling.SchedulerObject{}, []scheduling.SchedulerObject{}, err
}
ws.DurationS = longest
ws.Message = "We estimate that the workflow will start at " + ws.Start.String() + " and last " + fmt.Sprintf("%v", ws.DurationS) + " seconds."
@@ -107,101 +106,94 @@ func (ws *WorkflowSchedule) GetBuyAndBook(wfID string, request *tools.APIRequest
}
execs, err := ws.GetExecutions(wf, isPreemptible)
if err != nil {
return false, wf, []*workflow_execution.WorkflowExecution{}, []*purchase_resource.PurchaseResource{}, []*booking.Booking{}, err
return false, wf, []*workflow_execution.WorkflowExecution{}, []scheduling.SchedulerObject{}, []scheduling.SchedulerObject{}, err
}
purchased := []*purchase_resource.PurchaseResource{}
bookings := []*booking.Booking{}
purchased := []scheduling.SchedulerObject{}
bookings := []scheduling.SchedulerObject{}
for _, exec := range execs {
purchased = append(purchased, exec.Buy(ws.SelectedBillingStrategy, ws.UUID, wfID, priceds)...)
bookings = append(bookings, exec.Book(ws.UUID, wfID, priceds)...)
for _, obj := range exec.Buy(ws.SelectedBillingStrategy, ws.UUID, wfID, priceds) {
purchased = append(purchased, scheduling.ToSchedulerObject(tools.PURCHASE_RESOURCE, obj))
}
for _, obj := range exec.Book(ws.UUID, wfID, priceds) {
bookings = append(bookings, scheduling.ToSchedulerObject(tools.BOOKING, obj))
}
}
return true, wf, execs, purchased, bookings, nil
}
func (ws *WorkflowSchedule) GenerateOrder(purchases []*purchase_resource.PurchaseResource, bookings []*booking.Booking, request *tools.APIRequest) error {
// GenerateOrder creates a draft order (+ draft bill) for the given purchases and bookings.
// Returns the created order ID and any error.
func (ws *WorkflowSchedule) GenerateOrder(purchases []scheduling.SchedulerObject, bookings []scheduling.SchedulerObject, executionsID string, request *tools.APIRequest) (string, error) {
newOrder := &order.Order{
AbstractObject: utils.AbstractObject{
Name: "order_" + request.PeerID + "_" + time.Now().UTC().Format("2006-01-02T15:04:05"),
IsDraft: true,
},
ExecutionsID: ws.UUID,
Purchases: purchases,
Bookings: bookings,
ExecutionsID: executionsID,
Purchases: []*purchase_resource.PurchaseResource{},
Bookings: []*booking.Booking{},
Status: enum.PENDING,
}
if res, _, err := order.NewAccessor(request).StoreOne(newOrder); err == nil {
for _, purch := range purchases {
newOrder.Purchases = append(
newOrder.Purchases, scheduling.FromSchedulerObject(tools.PURCHASE_RESOURCE, purch).(*purchase_resource.PurchaseResource))
}
for _, b := range bookings {
newOrder.Bookings = append(
newOrder.Bookings, scheduling.FromSchedulerObject(tools.BOOKING, b).(*booking.Booking))
}
res, _, err := order.NewAccessor(request).StoreOne(newOrder)
if err != nil {
return "", err
}
if _, err := bill.DraftFirstBill(res.(*order.Order), request); err != nil {
return err
}
return nil
} else {
return err
return res.GetID(), err
}
return res.GetID(), nil
}
func (ws *WorkflowSchedule) Schedules(wfID string, request *tools.APIRequest) (*WorkflowSchedule, *workflow.Workflow, []*workflow_execution.WorkflowExecution, error) {
if request == nil {
return ws, nil, []*workflow_execution.WorkflowExecution{}, errors.New("no request found")
}
c := request.Caller
if c == nil || c.URLS == nil || c.URLS[tools.BOOKING] == nil {
return ws, nil, []*workflow_execution.WorkflowExecution{}, errors.New("no caller defined")
}
methods := c.URLS[tools.BOOKING]
if _, ok := methods[tools.GET]; !ok {
return ws, nil, []*workflow_execution.WorkflowExecution{}, errors.New("no path found")
}
ok, wf, executions, purchases, bookings, err := ws.GetBuyAndBook(wfID, request)
ws.WorkflowExecution = executions
if !ok || err != nil {
return ws, nil, executions, errors.New("could not book the workflow : " + fmt.Sprintf("%v", err))
}
ws.Workflow = wf
// Resolve our own peer MongoDB-ID once; used to decide local vs NATS routing.
selfID, _ := oclib.GetMySelf()
errCh := make(chan error, len(purchases))
for _, purchase := range purchases {
purchase.IsDraft = true
go propagateResource(purchase, purchase.DestPeerID, tools.PURCHASE_RESOURCE, selfID, request, errCh)
}
for i := 0; i < len(purchases); i++ {
if err := <-errCh; err != nil {
return ws, wf, executions, errors.New("could not propagate purchase: " + fmt.Sprintf("%v", err))
}
}
// If the client provides a scheduling_id from a Check session, confirm the
// pre-created drafts (bookings/purchases). Executions already exist as drafts
// and will be confirmed later by the considers mechanism.
if ws.UUID != "" {
adminReq := &tools.APIRequest{Admin: true}
errCh = make(chan error, len(bookings))
for _, bk := range bookings {
bk.IsDraft = true
go propagateResource(bk, bk.DestPeerID, tools.BOOKING, selfID, request, errCh)
}
for i := 0; i < len(bookings); i++ {
if err := <-errCh; err != nil {
return ws, wf, executions, errors.New("could not propagate booking: " + fmt.Sprintf("%v", err))
}
}
if err := ws.GenerateOrder(purchases, bookings, request); err != nil {
return ws, wf, executions, err
}
fmt.Println("Schedules")
// Obsolescence check: abort if any session execution's start date has passed.
executions := loadSessionExecs(ws.UUID)
for _, exec := range executions {
err := exec.PurgeDraft(request)
if err != nil {
return ws, nil, []*workflow_execution.WorkflowExecution{}, errors.New("purge draft" + fmt.Sprintf("%v", err))
if !exec.ExecDate.IsZero() && exec.ExecDate.Before(time.Now()) {
return ws, nil, nil, fmt.Errorf("execution %s is obsolete (start date in the past)", exec.GetID())
}
exec.StoreDraftDefault()
utils.GenericStoreOne(exec, workflow_execution.NewAccessor(request))
go EmitConsidersExecution(exec, wf)
}
fmt.Println("Schedules")
wf.GetAccessor(&tools.APIRequest{Admin: true}).UpdateOne(wf.Serialize(wf), wf.GetID())
if err := ConfirmSession(ws.UUID, selfID, request); err != nil {
return ws, nil, []*workflow_execution.WorkflowExecution{}, fmt.Errorf("confirm session failed: %w", err)
}
for _, exec := range executions {
go WatchExecDeadline(exec.GetID(), exec.ExecDate, selfID, request)
}
obj, _, _ := workflow.NewAccessor(request).LoadOne(wfID)
if obj == nil {
return ws, nil, executions, nil
}
wf := obj.(*workflow.Workflow)
ws.Workflow = wf
ws.WorkflowExecution = executions
wf.GetAccessor(adminReq).UpdateOne(wf.Serialize(wf), wf.GetID())
return ws, wf, executions, nil
}
// Schedule must be called from a Check session (ws.UUID set above).
// Direct scheduling without a prior Check session is not supported.
return ws, nil, []*workflow_execution.WorkflowExecution{}, errors.New("no scheduling session: use the Check stream first")
}
// propagateResource routes a purchase or booking to its destination:
@@ -210,14 +202,12 @@ func (ws *WorkflowSchedule) Schedules(wfID string, request *tools.APIRequest) (*
// - Otherwise a NATS CREATE_RESOURCE message is emitted so the destination
// peer can process it asynchronously.
//
// The caller is responsible for setting obj.IsDraft = true before calling.
// The caller is responsible for setting obj.IsDraft before calling.
func propagateResource(obj utils.DBObject, destPeerID string, dt tools.DataType, selfMongoID *peer.Peer, request *tools.APIRequest, errCh chan error) {
if selfMongoID == nil {
return
} // booking or purchase
if destPeerID == selfMongoID.GetID() {
if _, _, err := obj.GetAccessor(request).StoreOne(obj); err != nil {
errCh <- fmt.Errorf("could not store %s locally: %w", dt.String(), err)
stored := oclib.NewRequestAdmin(oclib.LibDataEnum(dt), nil).StoreOne(obj.Serialize(obj))
if stored.Err != "" || stored.Data == nil {
errCh <- fmt.Errorf("could not store %s locally: %s", dt.String(), stored.Err)
return
}
// The planner tracks booking time-slots only; purchases do not affect it.
@@ -227,17 +217,32 @@ func propagateResource(obj utils.DBObject, destPeerID string, dt tools.DataType,
errCh <- nil
return
}
payload, err := json.Marshal(obj)
m := obj.Serialize(obj)
if m["dest_peer_id"] != nil {
if data := oclib.NewRequestAdmin(oclib.LibDataEnum(oclib.PEER), nil).LoadOne(fmt.Sprintf("%v", m["dest_peer_id"])); data.Data != nil {
m["peer_id"] = data.Data.(*peer.Peer).PeerID
}
} else {
fmt.Println("NO DEST ID")
return
}
payload, err := json.Marshal(m)
if err != nil {
errCh <- fmt.Errorf("could not serialize %s: %w", dt.String(), err)
return
}
tools.NewNATSCaller().SetNATSPub(tools.CREATE_RESOURCE, tools.NATSResponse{
if b, err := json.Marshal(&tools.PropalgationMessage{
DataType: dt.EnumIndex(),
Action: tools.PB_CREATE,
Payload: payload,
}); err == nil {
tools.NewNATSCaller().SetNATSPub(tools.PROPALGATION_EVENT, tools.NATSResponse{
FromApp: "oc-scheduler",
Datatype: dt,
Method: int(tools.CREATE_RESOURCE),
Payload: payload,
Method: int(tools.PROPALGATION_EVENT),
Payload: b,
})
}
errCh <- nil
}
@@ -335,303 +340,3 @@ type Schedule struct {
* TODO : LARGEST GRAIN PLANIFYING THE WORKFLOW WHEN OPTION IS SET
* SET PROTECTION BORDER TIME
*/
// ---------------------------------------------------------------------------
// Slot availability check
// ---------------------------------------------------------------------------
const (
checkWindowHours = 5 // how far ahead to scan for a free slot (hours)
checkStepMin = 15 // time increment per scan step (minutes)
)
// CheckResult holds the outcome of a slot availability check.
type CheckResult struct {
Available bool `json:"available"`
Start time.Time `json:"start"`
End *time.Time `json:"end,omitempty"`
// NextSlot is the nearest free slot found within checkWindowHours when
// the requested slot is unavailable, or the preferred (conflict-free) slot
// when running in preemption mode.
NextSlot *time.Time `json:"next_slot,omitempty"`
Warnings []string `json:"warnings,omitempty"`
// Preemptible is true when the check was run in preemption mode.
Preemptible bool `json:"preemptible,omitempty"`
}
// bookingResource is the minimum info needed to verify a resource against the
// planner cache.
type bookingResource struct {
id string
peerID string
instanceID string // resolved from WorkflowSchedule.SelectedInstances
}
// Check verifies that all booking-relevant resources (storage and compute) of
// the given workflow have capacity for the requested time slot.
//
// - asap=true → ignore ws.Start, begin searching from time.Now()
// - preemption → always return Available=true but populate Warnings with
// conflicts and NextSlot with the nearest conflict-free alternative
func (ws *WorkflowSchedule) Check(wfID string, asap bool, preemption bool, request *tools.APIRequest) (*CheckResult, error) {
// 1. Load workflow
obj, code, err := workflow.NewAccessor(request).LoadOne(wfID)
if code != 200 || err != nil {
msg := "could not load workflow " + wfID
if err != nil {
msg += ": " + err.Error()
}
return nil, errors.New(msg)
}
wf := obj.(*workflow.Workflow)
// 2. Resolve start
start := ws.Start
if asap || start.IsZero() {
start = time.Now()
}
// 3. Resolve end use explicit end/duration or estimate via Planify
end := ws.End
if end == nil {
if ws.DurationS > 0 {
e := start.Add(time.Duration(ws.DurationS * float64(time.Second)))
end = &e
} else {
_, longest, _, _, planErr := wf.Planify(
start, nil,
ws.SelectedInstances, ws.SelectedPartnerships,
ws.SelectedBuyings, ws.SelectedStrategies,
int(ws.BookingMode), request,
)
if planErr == nil && longest > 0 {
e := start.Add(time.Duration(longest) * time.Second)
end = &e
}
}
}
// 4. Extract booking-relevant (storage + compute) resources from the graph,
// resolving the selected instance for each resource.
checkables := collectBookingResources(wf, ws.SelectedInstances)
fmt.Println(checkables)
// 5. Check every resource against its peer's planner
unavailable, warnings := checkResourceAvailability(checkables, start, end)
fmt.Println(unavailable, warnings)
result := &CheckResult{
Start: start,
End: end,
Warnings: warnings,
}
// 6. Preemption mode: mark as schedulable regardless of conflicts, but
// surface warnings and the nearest conflict-free alternative.
if preemption {
result.Available = true
result.Preemptible = true
if len(unavailable) > 0 {
result.NextSlot = findNextSlot(checkables, start, end, checkWindowHours)
}
return result, nil
}
// 7. All resources are free
if len(unavailable) == 0 {
result.Available = true
return result, nil
}
// 8. Slot unavailable locate the nearest free slot within the window
result.Available = false
result.NextSlot = findNextSlot(checkables, start, end, checkWindowHours)
return result, nil
}
// collectBookingResources returns unique storage and compute resources from the
// workflow graph. For each resource the selected instance ID is resolved from
// selectedInstances (the scheduler's SelectedInstances ConfigItem) so the planner
// check targets the exact instance chosen by the user.
func collectBookingResources(wf *workflow.Workflow, selectedInstances workflow.ConfigItem) []bookingResource {
if wf.Graph == nil {
return nil
}
seen := map[string]bool{}
var result []bookingResource
resolveInstanceID := func(res interface {
GetID() string
GetCreatorID() string
}) string {
idx := selectedInstances.Get(res.GetID())
switch r := res.(type) {
case *resources.StorageResource:
if inst := r.GetSelectedInstance(idx); inst != nil {
return inst.GetID()
}
case *resources.ComputeResource:
if inst := r.GetSelectedInstance(idx); inst != nil {
return inst.GetID()
}
}
return ""
}
for _, item := range wf.GetGraphItems(wf.Graph.IsStorage) {
i := item
_, res := i.GetResource()
if res == nil {
continue
}
id, peerID := res.GetID(), res.GetCreatorID()
if peerID == "" || seen[id] {
continue
}
seen[id] = true
result = append(result, bookingResource{
id: id,
peerID: peerID,
instanceID: resolveInstanceID(res),
})
}
for _, item := range wf.GetGraphItems(wf.Graph.IsCompute) {
i := item
_, res := i.GetResource()
if res == nil {
continue
}
id, peerID := res.GetID(), res.GetCreatorID()
if peerID == "" || seen[id] {
continue
}
seen[id] = true
result = append(result, bookingResource{
id: id,
peerID: peerID,
instanceID: resolveInstanceID(res),
})
}
return result
}
// checkResourceAvailability returns the IDs of unavailable resources and
// human-readable warning messages.
func checkResourceAvailability(res []bookingResource, start time.Time, end *time.Time) (unavailable []string, warnings []string) {
for _, r := range res {
plannerMu.RLock()
entry := PlannerCache[r.peerID]
plannerMu.RUnlock()
if entry == nil || entry.Planner == nil {
warnings = append(warnings, fmt.Sprintf(
"peer %s planner not in cache for resource %s assuming available", r.peerID, r.id))
continue
}
if !checkInstance(entry.Planner, r.id, r.instanceID, start, end) {
unavailable = append(unavailable, r.id)
warnings = append(warnings, fmt.Sprintf(
"resource %s is not available in [%s %s]",
r.id, start.Format(time.RFC3339), formatOptTime(end)))
}
}
return
}
// checkInstance checks availability for the specific instance resolved by the
// scheduler. When instanceID is empty (no instance selected / none resolvable),
// it falls back to checking all instances known in the planner and returns true
// if any one has remaining capacity. Returns true when no capacity is recorded.
func checkInstance(p *planner.Planner, resourceID string, instanceID string, start time.Time, end *time.Time) bool {
if instanceID != "" {
return p.Check(resourceID, instanceID, nil, start, end)
}
// Fallback: accept if any known instance has free capacity
caps, ok := p.Capacities[resourceID]
if !ok || len(caps) == 0 {
return true // no recorded usage → assume free
}
for id := range caps {
if p.Check(resourceID, id, nil, start, end) {
return true
}
}
return false
}
// findNextSlot scans forward from 'from' in checkStepMin increments for up to
// windowH hours and returns the first candidate start time at which all
// resources are simultaneously free.
func findNextSlot(resources []bookingResource, from time.Time, originalEnd *time.Time, windowH int) *time.Time {
duration := time.Hour
if originalEnd != nil {
if d := originalEnd.Sub(from); d > 0 {
duration = d
}
}
step := time.Duration(checkStepMin) * time.Minute
limit := from.Add(time.Duration(windowH) * time.Hour)
for t := from.Add(step); t.Before(limit); t = t.Add(step) {
e := t.Add(duration)
if unavail, _ := checkResourceAvailability(resources, t, &e); len(unavail) == 0 {
return &t
}
}
return nil
}
func formatOptTime(t *time.Time) string {
if t == nil {
return "open"
}
return t.Format(time.RFC3339)
}
// GetWorkflowPeerIDs loads the workflow and returns the deduplicated list of
// creator peer IDs for all its storage and compute resources.
// These are the peers whose planners must be watched by a check stream.
func GetWorkflowPeerIDs(wfID string, request *tools.APIRequest) ([]string, error) {
obj, code, err := workflow.NewAccessor(request).LoadOne(wfID)
if code != 200 || err != nil {
msg := "could not load workflow " + wfID
if err != nil {
msg += ": " + err.Error()
}
return nil, errors.New(msg)
}
wf := obj.(*workflow.Workflow)
if wf.Graph == nil {
return nil, nil
}
seen := map[string]bool{}
var peerIDs []string
for _, item := range wf.GetGraphItems(wf.Graph.IsStorage) {
i := item
_, res := i.GetResource()
if res == nil {
continue
}
if id := res.GetCreatorID(); id != "" && !seen[id] {
seen[id] = true
peerIDs = append(peerIDs, id)
}
}
for _, item := range wf.GetGraphItems(wf.Graph.IsCompute) {
i := item
_, res := i.GetResource()
if res == nil {
continue
}
if id := res.GetCreatorID(); id != "" && !seen[id] {
seen[id] = true
peerIDs = append(peerIDs, id)
}
}
realPeersID := []string{}
access := oclib.NewRequestAdmin(oclib.LibDataEnum(tools.PEER), nil)
for _, id := range peerIDs {
if data := access.LoadOne(id); data.Data != nil {
realPeersID = append(realPeersID, data.ToPeer().PeerID)
}
}
return realPeersID, nil
}

View File

@@ -0,0 +1,142 @@
package scheduling
import (
"encoding/json"
"cloud.o-forge.io/core/oc-lib/models/booking"
"cloud.o-forge.io/core/oc-lib/models/resources/purchase_resource"
"cloud.o-forge.io/core/oc-lib/models/utils"
"cloud.o-forge.io/core/oc-lib/tools"
)
type SchedulerObject interface {
utils.DBObject
SetIsDraft(bool)
GetKey() string
SetSchedulerPeerID(peerID string)
SetExecutionsID(ei string)
GetDestPeer() string
GetPeerSession() string
GetExecutionsId() string
GetExecutionId() string
}
type ScheduledPurchase struct {
purchase_resource.PurchaseResource
}
type ScheduledBooking struct {
booking.Booking
}
func FromSchedulerDBObject(dt tools.DataType, obj SchedulerObject) utils.DBObject {
switch dt {
case tools.BOOKING:
o := &booking.Booking{}
b, _ := json.Marshal(obj)
json.Unmarshal(b, &o)
return o
case tools.PURCHASE_RESOURCE:
o := &purchase_resource.PurchaseResource{}
b, _ := json.Marshal(obj)
json.Unmarshal(b, &o)
return o
}
return nil
}
func FromSchedulerObject(dt tools.DataType, obj SchedulerObject) utils.ShallowDBObject {
switch dt {
case tools.BOOKING:
o := &booking.Booking{}
b, _ := json.Marshal(obj)
json.Unmarshal(b, &o)
return o
case tools.PURCHASE_RESOURCE:
o := &purchase_resource.PurchaseResource{}
b, _ := json.Marshal(obj)
json.Unmarshal(b, &o)
return o
}
return nil
}
func ToSchedulerObject(dt tools.DataType, obj utils.ShallowDBObject) SchedulerObject {
switch dt {
case tools.BOOKING:
o := &ScheduledBooking{}
b, _ := json.Marshal(obj)
json.Unmarshal(b, &o)
return o
case tools.PURCHASE_RESOURCE:
o := &ScheduledPurchase{}
b, _ := json.Marshal(obj)
json.Unmarshal(b, &o)
return o
}
return nil
}
func (b *ScheduledBooking) GetExecutionId() string {
return b.ExecutionID
}
func (b *ScheduledPurchase) GetExecutionId() string {
return b.ExecutionID
}
func (b *ScheduledBooking) GetExecutionsId() string {
return b.ExecutionsID
}
func (b *ScheduledPurchase) GetExecutionsId() string {
return b.ExecutionsID
}
func (b *ScheduledBooking) GetPeerSession() string {
return b.SchedulerPeerID
}
func (b *ScheduledPurchase) GetPeerSession() string {
return b.SchedulerPeerID
}
func (b *ScheduledBooking) GetDestPeer() string {
return b.DestPeerID
}
func (b *ScheduledPurchase) GetDestPeer() string {
return b.DestPeerID
}
func (b *ScheduledBooking) GetKey() string {
return b.ResourceID + "/" + b.InstanceID + "/" + tools.BOOKING.String()
}
func (b *ScheduledPurchase) GetKey() string {
return b.ResourceID + "/" + b.InstanceID + "/" + tools.PURCHASE_RESOURCE.String()
}
func (b *ScheduledBooking) SetIsDraft(ok bool) {
b.IsDraft = ok
}
func (b *ScheduledPurchase) SetIsDraft(ok bool) {
b.IsDraft = ok
}
func (b *ScheduledBooking) SetSchedulerPeerID(peerID string) {
b.SchedulerPeerID = peerID
}
func (b *ScheduledPurchase) SetSchedulerPeerID(peerID string) {
b.SchedulerPeerID = peerID
}
func (b *ScheduledBooking) SetExecutionsID(ei string) {
b.ExecutionsID = ei
}
func (b *ScheduledPurchase) SetExecutionsID(ei string) {
b.ExecutionsID = ei
}

345
infrastructure/session.go Normal file
View File

@@ -0,0 +1,345 @@
package infrastructure
import (
"encoding/json"
"fmt"
"oc-scheduler/infrastructure/scheduling"
"time"
oclib "cloud.o-forge.io/core/oc-lib"
"cloud.o-forge.io/core/oc-lib/dbs"
"cloud.o-forge.io/core/oc-lib/models/booking"
"cloud.o-forge.io/core/oc-lib/models/order"
"cloud.o-forge.io/core/oc-lib/models/peer"
"cloud.o-forge.io/core/oc-lib/models/resources/purchase_resource"
"cloud.o-forge.io/core/oc-lib/models/utils"
"cloud.o-forge.io/core/oc-lib/models/workflow_execution"
"cloud.o-forge.io/core/oc-lib/tools"
)
// removeResourcePayload is sent via NATS REMOVE_RESOURCE so the receiver can
// verify the delete order comes from the original scheduler session.
type removeResourcePayload struct {
ID string `json:"id"`
SchedulerPeerID string `json:"scheduler_peer_id"`
ExecutionsID string `json:"executions_id"`
}
// ---------------------------------------------------------------------------
// DB helpers — objects are found via executions_id
// ---------------------------------------------------------------------------
func sessionIDFilter(field, id string) *dbs.Filters {
return &dbs.Filters{
And: map[string][]dbs.Filter{
field: {{Operator: dbs.EQUAL.String(), Value: id}},
},
}
}
func loadSession(executionsID string, dt tools.DataType) []scheduling.SchedulerObject {
results := oclib.NewRequestAdmin(oclib.LibDataEnum(dt), nil).Search(
sessionIDFilter("executions_id", executionsID), "", true)
out := make([]scheduling.SchedulerObject, 0, len(results.Data))
for _, obj := range results.Data {
out = append(out, scheduling.ToSchedulerObject(dt, obj))
}
return out
}
func loadSessionExecs(executionsID string) []*workflow_execution.WorkflowExecution {
adminReq := &tools.APIRequest{Admin: true}
results, _, _ := workflow_execution.NewAccessor(adminReq).Search(
sessionIDFilter("executions_id", executionsID), "", true)
out := make([]*workflow_execution.WorkflowExecution, 0, len(results))
for _, obj := range results {
if exec, ok := obj.(*workflow_execution.WorkflowExecution); ok {
out = append(out, exec)
}
}
return out
}
func loadSessionOrder(executionsID string) *order.Order {
adminReq := &tools.APIRequest{Admin: true}
results, _, _ := order.NewAccessor(adminReq).Search(
sessionIDFilter("executions_id", executionsID), "", true)
for _, obj := range results {
if o, ok := obj.(*order.Order); ok {
return o
}
}
return nil
}
// ---------------------------------------------------------------------------
// Session upsert
// ---------------------------------------------------------------------------
// UpsertSessionDrafts creates or updates draft bookings/purchases/executions for a
// Check session. Existing objects are found via the DB (executions_id).
// Called on first successful check and on user date changes.
//
// - bookings/purchases: upserted by (resourceID, instanceID); stale ones deleted
// - executions: replaced on every call (dates may have changed)
// - order: created once, updated on subsequent calls
func (ws *WorkflowSchedule) UpsertSessionDrafts(wfID, executionsID string, selfID *peer.Peer, request *tools.APIRequest) {
_, _, execs, purchases, bookings, err := ws.GetBuyAndBook(wfID, request)
if err != nil {
return
}
adminReq := &tools.APIRequest{Admin: true}
// --- bookings ---
existing := map[string]scheduling.SchedulerObject{}
seen := map[string]bool{}
for dt, datas := range map[tools.DataType][]scheduling.SchedulerObject{
tools.BOOKING: bookings, tools.PURCHASE_RESOURCE: purchases,
} {
for _, bk := range loadSession(executionsID, dt) {
existing[bk.GetKey()] = bk
}
upsertSessionDrafts(dt, datas, existing, seen, selfID, executionsID, request)
for key, prev := range existing {
if !seen[key] {
deleteScheduling(dt, prev, selfID, request)
}
}
}
// --- executions: replace on every call (dates may have changed) ---
for _, old := range loadSessionExecs(executionsID) {
UnregisterExecLock(old.GetID())
workflow_execution.NewAccessor(adminReq).DeleteOne(old.GetID())
}
for _, exec := range execs {
exec.ExecutionsID = executionsID
exec.IsDraft = true
ex, _, err := utils.GenericStoreOne(exec, workflow_execution.NewAccessor(adminReq))
if err == nil {
RegisterExecLock(ex.GetID())
go WatchExecDeadline(ex.GetID(), exec.ExecDate, selfID, request)
}
}
// --- order: create once, update on subsequent calls ---
if existing := loadSessionOrder(executionsID); existing == nil {
ws.GenerateOrder(purchases, bookings, executionsID, request)
} else {
for _, purch := range purchases {
existing.Purchases = append(
existing.Purchases, scheduling.FromSchedulerObject(tools.PURCHASE_RESOURCE, purch).(*purchase_resource.PurchaseResource))
}
for _, b := range bookings {
existing.Bookings = append(
existing.Bookings, scheduling.FromSchedulerObject(tools.BOOKING, b).(*booking.Booking))
}
utils.GenericRawUpdateOne(existing, existing.GetID(), order.NewAccessor(adminReq))
}
}
// ---------------------------------------------------------------------------
// Session lifecycle
// ---------------------------------------------------------------------------
func upsertSessionDrafts(dt tools.DataType, datas []scheduling.SchedulerObject, existing map[string]scheduling.SchedulerObject,
seen map[string]bool, selfID *peer.Peer,
executionsID string, request *tools.APIRequest) {
fmt.Println("UpsertSessionDrafts", len(datas), len(existing))
for _, bk := range datas {
bk.SetSchedulerPeerID(selfID.PeerID)
bk.SetExecutionsID(executionsID)
seen[bk.GetKey()] = true
if prev, ok := existing[bk.GetKey()]; ok {
bk.SetID(prev.GetID())
bk.SetIsDraft(false)
// Convert to concrete type (Booking/PurchaseResource) so that
// GenericRawUpdateOne serializes the real struct, not the wrapper.
propagateWriteResource(
scheduling.FromSchedulerDBObject(dt, bk), bk.GetDestPeer(), dt, selfID, request)
} else {
errCh := make(chan error, 1)
propagateResource(scheduling.FromSchedulerDBObject(dt, bk), bk.GetDestPeer(), dt, selfID, request, errCh)
<-errCh
}
}
}
// CleanupSession deletes all draft bookings/purchases/executions/order for a
// session (called when the WebSocket closes without a confirm).
func CleanupSession(self *peer.Peer, executionsID string, selfID *peer.Peer, request *tools.APIRequest) {
adminReq := &tools.APIRequest{Admin: true}
for _, exec := range loadSessionExecs(executionsID) {
UnscheduleExecution(exec.GetID(), selfID, request)
workflow_execution.NewAccessor(adminReq).DeleteOne(exec.GetID())
}
if o := loadSessionOrder(executionsID); o != nil {
order.NewAccessor(adminReq).DeleteOne(o.GetID())
}
}
// ConfirmSession flips all session drafts to IsDraft=false and propagates them.
// The considers mechanism then transitions executions to IsDraft=false once
// all remote peers acknowledge.
func ConfirmSession(executionsID string, selfID *peer.Peer, request *tools.APIRequest) error {
for _, dt := range []tools.DataType{tools.BOOKING, tools.PURCHASE_RESOURCE} {
for _, bk := range loadSession(executionsID, dt) {
bk.SetIsDraft(false)
propagateWriteResource(
scheduling.FromSchedulerDBObject(dt, bk), bk.GetDestPeer(), dt, selfID, request)
}
}
return nil
}
// confirmSessionOrder sets the order IsDraft=false once all considers are received.
func confirmSessionOrder(executionsID string, adminReq *tools.APIRequest) {
if o := loadSessionOrder(executionsID); o != nil {
o.IsDraft = false
utils.GenericRawUpdateOne(o, o.GetID(), order.NewAccessor(adminReq))
}
}
// ---------------------------------------------------------------------------
// Propagation
// ---------------------------------------------------------------------------
// propagateWriteResource routes a booking/purchase write to its destination:
// - local peer → DB upsert; emits considers on confirm (IsDraft=false)
// - remote peer → NATS CREATE_RESOURCE (receiver upserts)
func propagateWriteResource(obj utils.DBObject, destPeerID string, dt tools.DataType, selfID *peer.Peer, request *tools.APIRequest) {
if destPeerID == selfID.GetID() {
if _, _, err := utils.GenericRawUpdateOne(obj, obj.GetID(), obj.GetAccessor(request)); err != nil {
fmt.Printf("propagateWriteResource: local update failed for %s %s: %v\n", dt, obj.GetID(), err)
return
}
if dt == tools.BOOKING {
go refreshSelfPlanner(selfID.PeerID, request)
}
fmt.Println("IS DRAFTED", obj.IsDrafted())
if !obj.IsDrafted() {
if payload, err := json.Marshal(&executionConsidersPayload{
ID: obj.GetID(),
}); err == nil {
go updateExecutionState(payload, dt)
}
}
return
}
payload, err := json.Marshal(obj)
if err != nil {
return
}
tools.NewNATSCaller().SetNATSPub(tools.CREATE_RESOURCE, tools.NATSResponse{
FromApp: "oc-scheduler",
Datatype: dt,
Method: int(tools.CREATE_RESOURCE),
Payload: payload,
})
}
// deleteBooking deletes a booking from its destination peer (local DB or NATS).
func deleteScheduling(dt tools.DataType, bk scheduling.SchedulerObject, selfID *peer.Peer, request *tools.APIRequest) {
if bk.GetDestPeer() == selfID.GetID() {
oclib.NewRequestAdmin(oclib.LibDataEnum(dt), nil).DeleteOne(bk.GetID())
go refreshSelfPlanner(selfID.PeerID, request)
return
}
emitNATSRemove(bk.GetID(), bk.GetPeerSession(), bk.GetExecutionsId(), dt)
}
// emitNATSRemove sends a REMOVE_RESOURCE event to the remote peer carrying
// auth fields so the receiver can verify the delete is legitimate.
func emitNATSRemove(id, schedulerPeerID, executionsID string, dt tools.DataType) {
payload, _ := json.Marshal(removeResourcePayload{
ID: id,
SchedulerPeerID: schedulerPeerID,
ExecutionsID: executionsID,
})
tools.NewNATSCaller().SetNATSPub(tools.REMOVE_RESOURCE, tools.NATSResponse{
FromApp: "oc-scheduler",
Datatype: dt,
Method: int(tools.REMOVE_RESOURCE),
Payload: payload,
})
}
// ---------------------------------------------------------------------------
// Deadline watchers
// ---------------------------------------------------------------------------
// WatchExecDeadline purges all unconfirmed bookings/purchases for an execution
// one minute before its scheduled start, to avoid stale drafts blocking resources.
// If the deadline has already passed (e.g. after a process restart), it fires immediately.
func WatchExecDeadline(executionID string, execDate time.Time, selfID *peer.Peer, request *tools.APIRequest) {
fmt.Println("WatchExecDeadline")
delay := time.Until(execDate.UTC().Add(-1 * time.Minute))
if delay <= 0 {
go purgeUnconfirmedExecution(executionID, selfID, request)
return
}
time.AfterFunc(delay, func() { purgeUnconfirmedExecution(executionID, selfID, request) })
}
func purgeUnconfirmedExecution(executionID string, selfID *peer.Peer, request *tools.APIRequest) {
acc := workflow_execution.NewAccessor(&tools.APIRequest{Admin: true})
UnscheduleExecution(executionID, selfID, request)
_, _, err := acc.DeleteOne(executionID)
fmt.Printf("purgeUnconfirmedExecution: cleaned up resources for execution %s\n", err)
}
// RecoverDraftExecutions is called at startup to restore deadline watchers for
// draft executions that survived a process restart. Executions already past
// their deadline are purged immediately.
func RecoverDraftExecutions() {
adminReq := &tools.APIRequest{Admin: true}
var selfID *peer.Peer
for selfID == nil {
selfID, _ = oclib.GetMySelf()
if selfID == nil {
time.Sleep(5 * time.Second)
}
}
results, _, _ := workflow_execution.NewAccessor(adminReq).Search(nil, "*", true)
for _, obj := range results {
exec, ok := obj.(*workflow_execution.WorkflowExecution)
if !ok {
continue
}
RegisterExecLock(exec.GetID())
go WatchExecDeadline(exec.GetID(), exec.ExecDate, selfID, adminReq)
}
fmt.Printf("RecoverDraftExecutions: recovered %d draft executions\n", len(results))
}
// ---------------------------------------------------------------------------
// Unschedule
// ---------------------------------------------------------------------------
// UnscheduleExecution deletes all bookings for an execution (via PeerBookByGraph)
// then deletes the execution itself.
func UnscheduleExecution(executionID string, selfID *peer.Peer, request *tools.APIRequest) error {
fmt.Println("UnscheduleExecution")
adminReq := &tools.APIRequest{Admin: true}
res, _, err := workflow_execution.NewAccessor(adminReq).LoadOne(executionID)
if err != nil || res == nil {
return fmt.Errorf("execution %s not found: %w", executionID, err)
}
exec := res.(*workflow_execution.WorkflowExecution)
for _, byResource := range exec.PeerBookByGraph {
for _, bookingIDs := range byResource {
for _, bkID := range bookingIDs {
bkRes, _, loadErr := booking.NewAccessor(adminReq).LoadOne(bkID)
fmt.Println("UnscheduleExecution", bkID, loadErr)
if loadErr != nil || bkRes == nil {
continue
}
deleteScheduling(tools.BOOKING, scheduling.ToSchedulerObject(tools.BOOKING, bkRes), selfID, request)
}
}
}
workflow_execution.NewAccessor(adminReq).DeleteOne(executionID)
UnregisterExecLock(executionID)
return nil
}

View File

@@ -36,5 +36,6 @@ func main() {
go infrastructure.ListenNATS()
go infrastructure.InitSelfPlanner()
go infrastructure.ListenConfirm()
go infrastructure.RecoverDraftExecutions()
beego.Run()
}

Binary file not shown.

View File

@@ -133,15 +133,6 @@ func init() {
Filters: nil,
Params: nil})
beego.GlobalControllerRouter["oc-scheduler/controllers:WorkflowSchedulerController"] = append(beego.GlobalControllerRouter["oc-scheduler/controllers:WorkflowSchedulerController"],
beego.ControllerComments{
Method: "Schedule",
Router: `/:id`,
AllowHTTPMethods: []string{"post"},
MethodParams: param.Make(),
Filters: nil,
Params: nil})
beego.GlobalControllerRouter["oc-scheduler/controllers:WorkflowSchedulerController"] = append(beego.GlobalControllerRouter["oc-scheduler/controllers:WorkflowSchedulerController"],
beego.ControllerComments{
Method: "UnSchedule",
@@ -151,15 +142,6 @@ func init() {
Filters: nil,
Params: nil})
beego.GlobalControllerRouter["oc-scheduler/controllers:WorkflowSchedulerController"] = append(beego.GlobalControllerRouter["oc-scheduler/controllers:WorkflowSchedulerController"],
beego.ControllerComments{
Method: "CheckStream",
Router: `/:id/check`,
AllowHTTPMethods: []string{"get"},
MethodParams: param.Make(),
Filters: nil,
Params: nil})
beego.GlobalControllerRouter["oc-scheduler/controllers:WorkflowSchedulerController"] = append(beego.GlobalControllerRouter["oc-scheduler/controllers:WorkflowSchedulerController"],
beego.ControllerComments{
Method: "SearchScheduledDraftOrder",

View File

@@ -8,6 +8,7 @@
package routers
import (
"net/http"
"oc-scheduler/controllers"
beego "github.com/beego/beego/v2/server/web"
@@ -46,4 +47,8 @@ func init() {
)
beego.AddNamespace(ns)
// WebSocket route registered outside the Beego pipeline to avoid the
// spurious WriteHeader that prevents the 101 Switching Protocols upgrade.
beego.Handler("/oc/:id/check", http.HandlerFunc(controllers.CheckStreamHandler))
}

View File

@@ -260,6 +260,81 @@
}
}
},
"/verification/": {
"get": {
"tags": [
"verification"
],
"description": "find verification by id\n\u003cbr\u003e",
"operationId": "ExecutionVerificationController.GetAll",
"parameters": [
{
"in": "query",
"name": "is_draft",
"description": "draft wished",
"type": "string"
}
],
"responses": {
"200": {
"description": "{booking} models.booking"
}
}
}
},
"/verification/{id}": {
"get": {
"tags": [
"verification"
],
"description": "find verification by id\n\u003cbr\u003e",
"operationId": "ExecutionVerificationController.Get",
"parameters": [
{
"in": "path",
"name": "id",
"description": "the id you want to get",
"required": true,
"type": "string"
}
],
"responses": {
"200": {
"description": "{booking} models.booking"
}
}
},
"put": {
"tags": [
"verification"
],
"description": "create computes\n\u003cbr\u003e",
"operationId": "ExecutionVerificationController.Update",
"parameters": [
{
"in": "path",
"name": "id",
"description": "the compute id you want to get",
"required": true,
"type": "string"
},
{
"in": "body",
"name": "body",
"description": "The compute content",
"required": true,
"schema": {
"$ref": "#/definitions/models.compute"
}
}
],
"responses": {
"200": {
"description": "{compute} models.compute"
}
}
}
},
"/version/": {
"get": {
"tags": [
@@ -289,100 +364,29 @@
}
},
"/{id}": {
"post": {
"tags": [
"oc-scheduler/controllersWorkflowSchedulerController"
],
"description": "schedule workflow\n\u003cbr\u003e",
"operationId": "WorkflowSchedulerController.Schedule",
"parameters": [
{
"in": "path",
"name": "id",
"description": "id execution",
"required": true,
"type": "string"
},
{
"in": "body",
"name": "body",
"description": "The compute content",
"required": true,
"schema": {
"$ref": "#/definitions/models.compute"
}
}
],
"responses": {
"200": {
"description": "{workspace} models.workspace"
}
}
},
"delete": {
"tags": [
"oc-scheduler/controllersWorkflowSchedulerController"
],
"description": "schedule workflow\n\u003cbr\u003e",
"description": "unschedule a workflow execution: deletes its bookings on all peers then deletes the execution.\n\u003cbr\u003e",
"operationId": "WorkflowSchedulerController.UnSchedule",
"parameters": [
{
"in": "path",
"name": "id",
"description": "id execution",
"description": "execution id",
"required": true,
"type": "string"
},
{
"in": "body",
"name": "body",
"description": "The compute content",
"required": true,
"schema": {
"$ref": "#/definitions/models.compute"
}
}
],
"responses": {
"200": {
"description": "{workspace} models.workspace"
"description": "",
"schema": {
"$ref": "#/definitions/map[string]interface{}"
}
}
}
},
"/{id}/check": {
"get": {
"tags": [
"oc-scheduler/controllersWorkflowSchedulerController"
],
"description": "WebSocket stream of slot availability for a workflow.\n\u003cbr\u003e",
"operationId": "WorkflowSchedulerController.CheckStream",
"parameters": [
{
"in": "path",
"name": "id",
"description": "workflow id",
"required": true,
"type": "string"
},
{
"in": "query",
"name": "as_possible",
"description": "find nearest free slot from now",
"type": "boolean"
},
{
"in": "query",
"name": "preemption",
"description": "validate anyway, raise warnings",
"type": "boolean"
}
],
"responses": {
"101": {
"description": ""
}
}
}
},
"/{id}/order": {
@@ -410,6 +414,10 @@
}
},
"definitions": {
"map[string]interface{}": {
"title": "map[string]interface{}",
"type": "object"
},
"models.compute": {
"title": "compute",
"type": "object"
@@ -428,6 +436,10 @@
"name": "booking",
"description": "Operations about workspace\n"
},
{
"name": "verification",
"description": "Operations about workspace\n"
},
{
"name": "execution",
"description": "Operations about workflow\n"

View File

@@ -13,75 +13,24 @@ info:
basePath: /oc/
paths:
/{id}:
post:
tags:
- oc-scheduler/controllersWorkflowSchedulerController
description: |-
schedule workflow
<br>
operationId: WorkflowSchedulerController.Schedule
parameters:
- in: path
name: id
description: id execution
required: true
type: string
- in: body
name: body
description: The compute content
required: true
schema:
$ref: '#/definitions/models.compute'
responses:
"200":
description: '{workspace} models.workspace'
delete:
tags:
- oc-scheduler/controllersWorkflowSchedulerController
description: |-
schedule workflow
unschedule a workflow execution: deletes its bookings on all peers then deletes the execution.
<br>
operationId: WorkflowSchedulerController.UnSchedule
parameters:
- in: path
name: id
description: id execution
description: execution id
required: true
type: string
- in: body
name: body
description: The compute content
required: true
schema:
$ref: '#/definitions/models.compute'
responses:
"200":
description: '{workspace} models.workspace'
/{id}/check:
get:
tags:
- oc-scheduler/controllersWorkflowSchedulerController
description: |-
WebSocket stream of slot availability for a workflow.
<br>
operationId: WorkflowSchedulerController.CheckStream
parameters:
- in: path
name: id
description: workflow id
required: true
type: string
- in: query
name: as_possible
description: find nearest free slot from now
type: boolean
- in: query
name: preemption
description: validate anyway, raise warnings
type: boolean
responses:
"101":
description: ""
schema:
$ref: '#/definitions/map[string]interface{}'
/{id}/order:
get:
tags:
@@ -277,6 +226,61 @@ paths:
responses:
"200":
description: '{workspace} models.workspace'
/verification/:
get:
tags:
- verification
description: |-
find verification by id
<br>
operationId: ExecutionVerificationController.GetAll
parameters:
- in: query
name: is_draft
description: draft wished
type: string
responses:
"200":
description: '{booking} models.booking'
/verification/{id}:
get:
tags:
- verification
description: |-
find verification by id
<br>
operationId: ExecutionVerificationController.Get
parameters:
- in: path
name: id
description: the id you want to get
required: true
type: string
responses:
"200":
description: '{booking} models.booking'
put:
tags:
- verification
description: |-
create computes
<br>
operationId: ExecutionVerificationController.Update
parameters:
- in: path
name: id
description: the compute id you want to get
required: true
type: string
- in: body
name: body
description: The compute content
required: true
schema:
$ref: '#/definitions/models.compute'
responses:
"200":
description: '{compute} models.compute'
/version/:
get:
tags:
@@ -300,6 +304,9 @@ paths:
"200":
description: ""
definitions:
map[string]interface{}:
title: map[string]interface{}
type: object
models.compute:
title: compute
type: object
@@ -313,6 +320,9 @@ tags:
- name: booking
description: |
Operations about workspace
- name: verification
description: |
Operations about workspace
- name: execution
description: |
Operations about workflow

24
ws.go
View File

@@ -23,7 +23,7 @@ func main() {
// ws://localhost:8090/oc/<workflow-id>/check
// ws://localhost:8090/oc/<workflow-id>/check?as_possible=true
// ws://localhost:8090/oc/<workflow-id>/check?as_possible=true&preemption=true
url := "ws://localhost:8090/oc/WORKFLOW_ID/check?as_possible=true"
url := "ws://localhost:8090/oc/58314c99-c595-4ca2-8b5e-822a6774efed/check?as_possible=true"
token := ""
// Body JSON envoyé comme premier message WebSocket (WorkflowSchedule).
// Seuls start + duration_s sont requis si as_possible=true.
@@ -80,6 +80,14 @@ func main() {
}
}()
// Après 5 secondes, simule un changement de date côté front (now + 3 min).
dateChangeTick := time.NewTimer(10 * time.Second)
defer dateChangeTick.Stop()
// Après 15 secondes, simule la confirmation du scheduling par le client.
confirmTick := time.NewTimer(15 * time.Second)
defer confirmTick.Stop()
idleTimer := time.NewTimer(time.Duration(*timeout) * time.Second)
defer idleTimer.Stop()
@@ -94,6 +102,20 @@ func main() {
case <-idleTimer.C:
fmt.Printf("Timeout (%ds) — aucun message reçu, fermeture.\n", *timeout)
return
case <-dateChangeTick.C:
newStart := time.Now().UTC().Add(3 * time.Minute)
update := `{"start":"` + newStart.Format(time.RFC3339) + `","duration_s":3600}`
fmt.Printf("\n[sim] Envoi mise à jour de date → %s\n\n", update)
if err := websocket.Message.Send(ws, update); err != nil {
fmt.Printf("Erreur envoi mise à jour : %v\n", err)
return
}
case <-confirmTick.C:
fmt.Println("\n[sim] Envoi confirmation du scheduling → {\"confirm\":true}\n")
if err := websocket.Message.Send(ws, `{"confirm":true}`); err != nil {
fmt.Printf("Erreur envoi confirmation : %v\n", err)
return
}
case raw := <-msgs:
idleTimer.Reset(time.Duration(*timeout) * time.Second)
var data any