113 lines
3.2 KiB
Go
113 lines
3.2 KiB
Go
package infrastructure
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"sync"
|
|
"time"
|
|
|
|
oclib "cloud.o-forge.io/core/oc-lib"
|
|
"cloud.o-forge.io/core/oc-lib/dbs"
|
|
bookingmodel "cloud.o-forge.io/core/oc-lib/models/booking"
|
|
"cloud.o-forge.io/core/oc-lib/models/common/enum"
|
|
"cloud.o-forge.io/core/oc-lib/tools"
|
|
"go.mongodb.org/mongo-driver/bson/primitive"
|
|
)
|
|
|
|
// processedBookings tracks booking IDs already handled this process lifetime.
|
|
var processedBookings sync.Map
|
|
|
|
// closingStates is the set of terminal booking states.
|
|
var ClosingStates = map[enum.BookingStatus]bool{
|
|
enum.FAILURE: true,
|
|
enum.SUCCESS: true,
|
|
enum.FORGOTTEN: true,
|
|
enum.CANCELLED: true,
|
|
}
|
|
|
|
// WatchBookings is a safety-net fallback for when oc-monitord fails to launch.
|
|
// It detects bookings that are past expected_start_date by at least 1 minute and
|
|
// are still in a non-terminal state. Instead of writing to the database directly,
|
|
// it emits WORKFLOW_STEP_DONE_EVENT with State=FAILURE on NATS so that oc-scheduler
|
|
// handles the state transition — keeping a single source of truth for booking state.
|
|
//
|
|
// Must be launched in a goroutine from main.
|
|
func WatchBookings() {
|
|
logger := oclib.GetLogger()
|
|
logger.Info().Msg("BookingWatchdog: started")
|
|
ticker := time.NewTicker(time.Minute)
|
|
defer ticker.Stop()
|
|
for range ticker.C {
|
|
if err := scanStaleBookings(); err != nil {
|
|
logger.Error().Msg("BookingWatchdog: " + err.Error())
|
|
}
|
|
}
|
|
}
|
|
|
|
// scanStaleBookings queries all bookings whose ExpectedStartDate passed more than
|
|
// 1 minute ago. Non-terminal ones get a WORKFLOW_STEP_DONE_EVENT FAILURE emitted
|
|
// on NATS so oc-scheduler closes them.
|
|
func scanStaleBookings() error {
|
|
myself, err := oclib.GetMySelf()
|
|
if err != nil {
|
|
return fmt.Errorf("could not resolve local peer: %w", err)
|
|
}
|
|
peerID := myself.GetID()
|
|
|
|
deadline := time.Now().UTC().Add(-time.Minute)
|
|
res := oclib.NewRequest(oclib.LibDataEnum(oclib.BOOKING), "", peerID, []string{}, nil).
|
|
Search(&dbs.Filters{
|
|
And: map[string][]dbs.Filter{
|
|
"expected_start_date": {{
|
|
Operator: dbs.LTE.String(),
|
|
Value: primitive.NewDateTimeFromTime(deadline),
|
|
}},
|
|
},
|
|
}, "", false)
|
|
|
|
if res.Err != "" {
|
|
return fmt.Errorf("stale booking search failed: %s", res.Err)
|
|
}
|
|
|
|
for _, dbo := range res.Data {
|
|
b, ok := dbo.(*bookingmodel.Booking)
|
|
if !ok {
|
|
continue
|
|
}
|
|
go emitWatchdogFailure(b)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// emitWatchdogFailure publishes a WORKFLOW_STEP_DONE_EVENT FAILURE for a stale
|
|
// booking. oc-scheduler is the single authority for booking state transitions.
|
|
func emitWatchdogFailure(b *bookingmodel.Booking) {
|
|
logger := oclib.GetLogger()
|
|
|
|
if _, done := processedBookings.Load(b.GetID()); done {
|
|
return
|
|
}
|
|
if ClosingStates[b.State] {
|
|
processedBookings.Store(b.GetID(), struct{}{})
|
|
return
|
|
}
|
|
|
|
now := time.Now().UTC()
|
|
payload, err := json.Marshal(tools.WorkflowLifecycleEvent{
|
|
BookingID: b.GetID(),
|
|
State: enum.FAILURE.EnumIndex(),
|
|
RealEnd: &now,
|
|
})
|
|
if err != nil {
|
|
return
|
|
}
|
|
tools.NewNATSCaller().SetNATSPub(tools.WORKFLOW_STEP_DONE_EVENT, tools.NATSResponse{
|
|
FromApp: "oc-datacenter",
|
|
Method: int(tools.WORKFLOW_STEP_DONE_EVENT),
|
|
Payload: payload,
|
|
})
|
|
|
|
logger.Info().Msgf("BookingWatchdog: booking %s stale → emitting FAILURE", b.GetID())
|
|
processedBookings.Store(b.GetID(), struct{}{})
|
|
}
|