Scheduling Node

This commit is contained in:
mr
2026-03-17 11:58:27 +01:00
parent b9df0b2731
commit 7fbc077cb1
20 changed files with 2281 additions and 1504 deletions

View File

@@ -58,7 +58,6 @@ func (o *LokiController) GetLogs() {
path += "?query={" + strings.Join(query, ", ") + "}&start=" + start + "&end=" + end
resp, err := http.Get(config.GetConfig().LokiUrl + path) // CALL
fmt.Println(resp, path)
if err != nil {
o.Ctx.ResponseWriter.WriteHeader(422)
o.Data["json"] = map[string]string{"error": err.Error()}

265
controllers/sheduler.go Normal file
View File

@@ -0,0 +1,265 @@
package controllers
import (
"fmt"
"net/http"
"oc-scheduler/infrastructure"
"strings"
oclib "cloud.o-forge.io/core/oc-lib"
"cloud.o-forge.io/core/oc-lib/dbs"
"cloud.o-forge.io/core/oc-lib/tools"
beego "github.com/beego/beego/v2/server/web"
"github.com/google/uuid"
gorillaws "github.com/gorilla/websocket"
)
var orderCollection = oclib.LibDataEnum(oclib.ORDER)
var logger = oclib.GetLogger()
// Operations about workflow
type WorkflowSchedulerController struct {
beego.Controller
}
var wsUpgrader = gorillaws.Upgrader{
CheckOrigin: func(r *http.Request) bool { return true },
}
// CheckStreamHandler is the WebSocket handler for slot availability checking.
// It is invoked via the CheckStream controller method.
// Query params: as_possible=true, preemption=true
func CheckStreamHandler(w http.ResponseWriter, r *http.Request) {
wfID := strings.TrimSuffix(
strings.TrimPrefix(r.URL.Path, "/oc/"),
"/check",
)
q := r.URL.Query()
asap := q.Get("as_possible") == "true"
preemption := q.Get("preemption") == "true"
user, peerID, groups := oclib.ExtractTokenInfo(*r)
req := &tools.APIRequest{
Username: user,
PeerID: peerID,
Groups: groups,
Caller: nil,
Admin: true,
}
watchedPeers, err := infrastructure.GetWorkflowPeerIDs(wfID, req)
fmt.Println("Here my watched peers involved in workflow", watchedPeers)
if err != nil {
http.Error(w, `{"code":404,"error":"`+err.Error()+`"}`, http.StatusNotFound)
return
}
conn, err := wsUpgrader.Upgrade(w, r, nil)
if err != nil {
return
}
var ws infrastructure.WorkflowSchedule
if err := conn.ReadJSON(&ws); err != nil {
conn.Close()
return
}
plannerCh, plannerUnsub := infrastructure.SubscribePlannerUpdates(watchedPeers)
wfCh, wfUnsub := infrastructure.SubscribeWorkflowUpdates(wfID)
executionsID := uuid.New().String()
ownedPeers := infrastructure.RequestPlannerRefresh(watchedPeers, executionsID)
selfID, err := oclib.GetMySelf()
if err != nil || selfID == nil {
logger.Err(err).Msg(err.Error())
return
}
selfPeerID := ""
if selfID != nil {
selfPeerID = selfID.PeerID
}
// scheduled=true once bookings/purchases/exec have been created for this session.
scheduled := false
confirmed := false
defer func() {
conn.Close()
plannerUnsub()
wfUnsub()
infrastructure.ReleaseRefreshOwnership(ownedPeers, executionsID)
if !confirmed {
infrastructure.CleanupSession(selfID, executionsID, selfID, req)
}
}()
// pushCheck runs an availability check and sends the result to the client.
// If reschedule=true and the slot is available, it also creates/updates
// bookings, purchases and the execution draft for this session.
pushCheck := func(reschedule bool) error {
result, checkErr := ws.Check(wfID, asap, preemption, req)
if checkErr != nil {
return checkErr
}
if result.Available && reschedule {
// Sync the resolved start/end back to ws so that UpsertSessionDrafts
// creates bookings/purchases with the actual scheduled dates (not the
// raw client value which may be zero or pre-asapBuffer).
ws.Start = result.Start
if result.End != nil {
ws.End = result.End
}
ws.UpsertSessionDrafts(wfID, executionsID, selfID, req)
scheduled = true
}
result.SchedulingID = executionsID
return conn.WriteJSON(result)
}
// Initial check + schedule.
if err := pushCheck(true); err != nil {
return
}
updateCh := make(chan infrastructure.WorkflowSchedule, 1)
closeCh := make(chan struct{})
go func() {
defer close(closeCh)
for {
var updated infrastructure.WorkflowSchedule
if err := conn.ReadJSON(&updated); err != nil {
return
}
select {
case updateCh <- updated:
default:
<-updateCh
updateCh <- updated
}
}
}()
for {
select {
case updated := <-updateCh:
if updated.Confirm {
// Confirm: flip bookings/purchases to IsDraft=false, then let
// the considers mechanism transition exec to IsDraft=false.
ws.UUID = executionsID
_, _, _, schedErr := ws.Schedules(wfID, req)
if schedErr != nil {
_ = conn.WriteJSON(map[string]interface{}{
"error": schedErr.Error(),
})
return
}
confirmed = true
return
}
infrastructure.CleanupSession(selfID, executionsID, selfID, req)
// Detect whether the user changed dates or instances.
datesChanged := !updated.Start.Equal(ws.Start) ||
updated.DurationS != ws.DurationS ||
(updated.End == nil) != (ws.End == nil) ||
(updated.End != nil && ws.End != nil && !updated.End.Equal(*ws.End))
ws = updated
// Reschedule when dates changed or we haven't scheduled yet.
if err := pushCheck(datesChanged || !scheduled); err != nil {
return
}
case remotePeerID := <-plannerCh:
if remotePeerID == selfPeerID {
// Our own planner updated (caused by our local booking store).
// Just resend the current availability result without rescheduling
// to avoid an infinite loop.
result, checkErr := ws.Check(wfID, asap, preemption, req)
if checkErr == nil {
result.SchedulingID = executionsID
_ = conn.WriteJSON(result)
}
continue
}
// A remote peer's planner changed. Re-check; if our slot is now
// taken and we were already scheduled, reschedule at the new slot.
result, checkErr := ws.Check(wfID, asap, preemption, req)
if checkErr != nil {
return
}
if !result.Available && scheduled {
// Move to the next free slot and reschedule.
if result.NextSlot != nil {
ws.Start = *result.NextSlot
}
if err := pushCheck(true); err != nil {
return
}
} else {
result.SchedulingID = executionsID
_ = conn.WriteJSON(result)
}
case <-wfCh:
if newPeers, err := infrastructure.GetWorkflowPeerIDs(wfID, req); err == nil {
plannerUnsub()
watchedPeers = newPeers
plannerCh, plannerUnsub = infrastructure.SubscribePlannerUpdates(newPeers)
newOwned := infrastructure.RequestPlannerRefresh(newPeers, executionsID)
ownedPeers = append(ownedPeers, newOwned...)
}
if err := pushCheck(false); err != nil {
return
}
case <-closeCh:
return
}
}
}
// @Title UnSchedule
// @Description unschedule a workflow execution: deletes its bookings on all peers then deletes the execution.
// @Param id path string true "execution id"
// @Success 200 {object} map[string]interface{}
// @router /:id [delete]
func (o *WorkflowSchedulerController) UnSchedule() {
user, peerID, groups := oclib.ExtractTokenInfo(*o.Ctx.Request)
executionID := o.Ctx.Input.Param(":id")
req := &tools.APIRequest{
Username: user,
PeerID: peerID,
Groups: groups,
Admin: true,
}
selfID, _ := oclib.GetMySelf()
if err := infrastructure.UnscheduleExecution(executionID, selfID, req); err != nil {
o.Data["json"] = map[string]interface{}{"code": 404, "error": err.Error()}
} else {
o.Data["json"] = map[string]interface{}{"code": 200, "error": ""}
}
o.ServeJSON()
}
// @Title SearchScheduledDraftOrder
// @Description schedule workflow
// @Param id path string true "id execution"
// @Success 200 {workspace} models.workspace
// @router /:id/order [get]
func (o *WorkflowSchedulerController) SearchScheduledDraftOrder() {
_, peerID, _ := oclib.ExtractTokenInfo(*o.Ctx.Request)
id := o.Ctx.Input.Param(":id")
filter := &dbs.Filters{
And: map[string][]dbs.Filter{
"workflow_id": {{Operator: dbs.EQUAL.String(), Value: id}},
"order_by": {{Operator: dbs.EQUAL.String(), Value: peerID}},
},
}
o.Data["json"] = oclib.NewRequestAdmin(orderCollection, nil).Search(filter, "", true)
//o.Data["json"] = oclib.NewRequest(orderCollection, user, peerID, groups, nil).Search(filter, "", true)
o.ServeJSON()
}

View File

@@ -1,270 +0,0 @@
package controllers
import (
"encoding/json"
"fmt"
"net/http"
"oc-scheduler/infrastructure"
"strings"
oclib "cloud.o-forge.io/core/oc-lib"
"cloud.o-forge.io/core/oc-lib/dbs"
"cloud.o-forge.io/core/oc-lib/tools"
beego "github.com/beego/beego/v2/server/web"
"github.com/google/uuid"
gorillaws "github.com/gorilla/websocket"
)
var orderCollection = oclib.LibDataEnum(oclib.ORDER)
var logger = oclib.GetLogger()
// Operations about workflow
type WorkflowSchedulerController struct {
beego.Controller
}
// @Title Schedule
// @Description schedule workflow
// @Param id path string true "id execution"
// @Param body body models.compute true "The compute content"
// @Success 200 {workspace} models.workspace
// @router /:id [post]
func (o *WorkflowSchedulerController) Schedule() {
logger := oclib.GetLogger()
code := 200
e := ""
user, peerID, groups := oclib.ExtractTokenInfo(*o.Ctx.Request)
wfId := o.Ctx.Input.Param(":id")
var resp *infrastructure.WorkflowSchedule
json.Unmarshal(o.Ctx.Input.CopyBody(100000), &resp)
logger.Info().Msg("Booking for " + wfId)
req := oclib.NewRequestAdmin(collection, nil)
// req := oclib.NewRequest(collection, user, peerID, groups, caller)
resp.UUID = uuid.New().String()
fmt.Println(user, peerID, groups)
sch, _, _, err := resp.Schedules(wfId, &tools.APIRequest{
Username: user,
PeerID: peerID,
Groups: groups,
Caller: nil,
Admin: true,
})
if err != nil {
if sch != nil {
for _, w := range sch.WorkflowExecution {
req.DeleteOne(w.GetID())
}
}
o.Data["json"] = map[string]interface{}{
"data": nil,
"code": 409,
"error": "Error when scheduling your execution(s): " + err.Error(),
}
o.ServeJSON()
return
}
o.Data["json"] = map[string]interface{}{
"data": sch.WorkflowExecution,
"code": code,
"error": e,
}
o.ServeJSON()
}
var wsUpgrader = gorillaws.Upgrader{
CheckOrigin: func(r *http.Request) bool { return true },
}
// @Title CheckStream
// @Description WebSocket stream for slot availability checking.
// @Param id path string true "workflow id"
// @Param as_possible query bool false "search from now"
// @Param preemption query bool false "force-valid, surface warnings"
// @router /:id/check [get]
func (o *WorkflowSchedulerController) CheckStream() {
CheckStreamHandler(o.Ctx.ResponseWriter, o.Ctx.Request)
}
// CheckStreamHandler is the WebSocket handler for slot availability checking.
// It is invoked via the CheckStream controller method.
// Query params: as_possible=true, preemption=true
func CheckStreamHandler(w http.ResponseWriter, r *http.Request) {
wfID := strings.TrimSuffix(
strings.TrimPrefix(r.URL.Path, "/oc/"),
"/check",
)
q := r.URL.Query()
asap := q.Get("as_possible") == "true"
preemption := q.Get("preemption") == "true"
user, peerID, groups := oclib.ExtractTokenInfo(*r)
req := &tools.APIRequest{
Username: user,
PeerID: peerID,
Groups: groups,
Caller: nil,
Admin: true,
}
// Resolve the peer IDs concerned by this workflow before upgrading so we
// can abort cleanly with a plain HTTP error if the workflow is not found.
watchedPeers, err := infrastructure.GetWorkflowPeerIDs(wfID, req)
fmt.Println("Here my watched peers involved in workflow", watchedPeers)
if err != nil {
http.Error(w, `{"code":404,"error":"`+err.Error()+`"}`, http.StatusNotFound)
return
}
// Upgrade to WebSocket.
conn, err := wsUpgrader.Upgrade(w, r, nil)
if err != nil {
// gorilla already wrote the error response
return
}
// Read the schedule parameters sent by the client as the first message.
var ws infrastructure.WorkflowSchedule
if err := conn.ReadJSON(&ws); err != nil {
conn.Close()
return
}
// Subscribe to planner updates for the initially resolved peers and to
// workflow change notifications (peer list may change on workflow edit).
plannerCh, plannerUnsub := infrastructure.SubscribePlannerUpdates(watchedPeers)
wfCh, wfUnsub := infrastructure.SubscribeWorkflowUpdates(wfID)
// Unique ID for this check session — used to track refresh ownership.
sessionID := uuid.New().String()
// Request a fresh planner snapshot for every concerned peer.
// The first session to claim a peer becomes its refresh owner; others skip
// the duplicate PB_PLANNER emission. ownedPeers grows if the workflow's
// peer list changes (wfCh).
ownedPeers := infrastructure.RequestPlannerRefresh(watchedPeers, sessionID)
// Cleanup on exit (clean or forced): release refresh ownership for the
// peers this session claimed, which resets Refreshing state and emits
// PB_CLOSE_PLANNER so oc-discovery stops the planner stream.
defer func() {
conn.Close()
plannerUnsub()
wfUnsub()
infrastructure.ReleaseRefreshOwnership(ownedPeers, sessionID)
}()
push := func() error {
result, checkErr := ws.Check(wfID, asap, preemption, req)
fmt.Println(result, checkErr)
if checkErr != nil {
return checkErr
}
return conn.WriteJSON(result)
}
// Initial check.
if err := push(); err != nil {
return
}
// Read loop: detect client-side close and parse schedule parameter
// updates (date changes, booking mode changes, …) sent by the client.
updateCh := make(chan infrastructure.WorkflowSchedule, 1)
closeCh := make(chan struct{})
go func() {
defer close(closeCh)
for {
var updated infrastructure.WorkflowSchedule
if err := conn.ReadJSON(&updated); err != nil {
// Connection closed or unrecoverable read error.
return
}
// Drop the oldest pending update if the consumer hasn't caught up.
select {
case updateCh <- updated:
default:
<-updateCh
updateCh <- updated
}
}
}()
// Stream loop.
for {
select {
case updated := <-updateCh:
// The client changed the requested date/params: adopt the new
// schedule and re-run the check immediately.
ws = updated
if err := push(); err != nil {
return
}
case <-wfCh:
// The workflow was modified: refresh the peer list and re-subscribe
// so the stream watches the correct set of planners going forward.
if newPeers, err := infrastructure.GetWorkflowPeerIDs(wfID, req); err == nil {
plannerUnsub()
watchedPeers = newPeers
plannerCh, plannerUnsub = infrastructure.SubscribePlannerUpdates(newPeers)
// Claim refresh ownership for any newly added peers.
newOwned := infrastructure.RequestPlannerRefresh(newPeers, sessionID)
ownedPeers = append(ownedPeers, newOwned...)
}
if err := push(); err != nil {
return
}
case <-plannerCh:
// A planner snapshot arrived (or was evicted): re-evaluate.
if err := push(); err != nil {
return
}
case <-closeCh:
return
}
}
}
// @Title UnSchedule
// @Description schedule workflow
// @Param id path string true "id execution"
// @Param body body models.compute true "The compute content"
// @Success 200 {workspace} models.workspace
// @router /:id [delete]
func (o *WorkflowSchedulerController) UnSchedule() {
// user, peerID, groups := oclib.ExtractTokenInfo(*o.Ctx.Request)
id := o.Ctx.Input.Param(":id")
// TODO UNSCHEDULER
filter := &dbs.Filters{
And: map[string][]dbs.Filter{
"workflow_id": {{Operator: dbs.EQUAL.String(), Value: id}},
},
}
o.Data["json"] = oclib.NewRequestAdmin(collection, nil).Search(filter, "", true)
// o.Data["json"] = oclib.NewRequest(collection, user, peerID, groups, nil).Search(filter, "", true)
o.ServeJSON()
}
// @Title SearchScheduledDraftOrder
// @Description schedule workflow
// @Param id path string true "id execution"
// @Success 200 {workspace} models.workspace
// @router /:id/order [get]
func (o *WorkflowSchedulerController) SearchScheduledDraftOrder() {
user, peerID, groups := oclib.ExtractTokenInfo(*o.Ctx.Request)
id := o.Ctx.Input.Param(":id")
fmt.Println(user, peerID, groups)
filter := &dbs.Filters{
And: map[string][]dbs.Filter{
"workflow_id": {{Operator: dbs.EQUAL.String(), Value: id}},
"order_by": {{Operator: dbs.EQUAL.String(), Value: peerID}},
},
}
o.Data["json"] = oclib.NewRequestAdmin(orderCollection, nil).Search(filter, "", true)
//o.Data["json"] = oclib.NewRequest(orderCollection, user, peerID, groups, nil).Search(filter, "", true)
o.ServeJSON()
}