Workflow lifecycle events + resource instance duration tracking
- Add WorkflowLifecycleEvent + StepMetric to tools/workflow_lifecycle.go - Add WORKFLOW_STARTED_EVENT, WORKFLOW_STEP_DONE_EVENT, WORKFLOW_DONE_EVENT NATS methods - ResourceInstance.UpdateAverageDuration for AverageDurationS running average - Support Steps recap in WORKFLOW_DONE_EVENT for catch-up by oc-scheduler/oc-catalog Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -30,6 +30,7 @@ var meths = []string{"remove execution", "create execution", "planner execution"
|
||||
"workflow event", "argo kube event", "create resource", "remove resource",
|
||||
"propalgation event", "search event", "confirm event",
|
||||
"considers event", "admiralty config event", "minio config event",
|
||||
"workflow started event", "workflow step done event", "workflow done event",
|
||||
}
|
||||
|
||||
const (
|
||||
@@ -52,6 +53,13 @@ const (
|
||||
CONSIDERS_EVENT
|
||||
ADMIRALTY_CONFIG_EVENT
|
||||
MINIO_CONFIG_EVENT
|
||||
|
||||
// Workflow lifecycle events emitted by oc-monitord.
|
||||
// oc-scheduler listens to STARTED and DONE to maintain WorkflowExecution state.
|
||||
// oc-datacenter listens to STEP_DONE and DONE to close bookings and tear down infra.
|
||||
WORKFLOW_STARTED_EVENT
|
||||
WORKFLOW_STEP_DONE_EVENT
|
||||
WORKFLOW_DONE_EVENT
|
||||
)
|
||||
|
||||
func (n NATSMethod) String() string {
|
||||
@@ -62,7 +70,8 @@ func (n NATSMethod) String() string {
|
||||
func NameToMethod(name string) NATSMethod {
|
||||
for _, v := range [...]NATSMethod{REMOVE_EXECUTION, CREATE_EXECUTION, PLANNER_EXECUTION, DISCOVERY, WORKFLOW_EVENT, ARGO_KUBE_EVENT,
|
||||
CREATE_RESOURCE, REMOVE_RESOURCE, PROPALGATION_EVENT, SEARCH_EVENT, CONFIRM_EVENT,
|
||||
CONSIDERS_EVENT, ADMIRALTY_CONFIG_EVENT, MINIO_CONFIG_EVENT} {
|
||||
CONSIDERS_EVENT, ADMIRALTY_CONFIG_EVENT, MINIO_CONFIG_EVENT,
|
||||
WORKFLOW_STARTED_EVENT, WORKFLOW_STEP_DONE_EVENT, WORKFLOW_DONE_EVENT} {
|
||||
if strings.Contains(strings.ToLower(v.String()), strings.ToLower(name)) {
|
||||
return v
|
||||
}
|
||||
|
||||
33
tools/workflow_lifecycle.go
Normal file
33
tools/workflow_lifecycle.go
Normal file
@@ -0,0 +1,33 @@
|
||||
package tools
|
||||
|
||||
import "time"
|
||||
|
||||
// StepMetric carries the outcome of one Argo step node as observed by oc-monitord.
|
||||
// Embedded in WorkflowLifecycleEvent.Steps for the WORKFLOW_DONE_EVENT recap.
|
||||
type StepMetric struct {
|
||||
BookingID string `json:"booking_id"`
|
||||
State int `json:"state"`
|
||||
RealStart *time.Time `json:"real_start,omitempty"`
|
||||
RealEnd *time.Time `json:"real_end,omitempty"`
|
||||
}
|
||||
|
||||
// WorkflowLifecycleEvent is the NATS payload emitted by oc-monitord on
|
||||
// WORKFLOW_STARTED_EVENT, WORKFLOW_STEP_DONE_EVENT, and WORKFLOW_DONE_EVENT.
|
||||
//
|
||||
// - ExecutionID : WorkflowExecution UUID (used by oc-scheduler to update state)
|
||||
// - ExecutionsID : run-group ID shared by all bookings of the same run
|
||||
// - BookingID : non-empty only for WORKFLOW_STEP_DONE_EVENT
|
||||
// - State : target state (enum index: SUCCESS=3, FAILURE=4, STARTED=2, …)
|
||||
// - RealStart : actual start timestamp recorded by Argo (nil if unknown)
|
||||
// - RealEnd : actual end timestamp recorded by Argo (nil for STARTED events)
|
||||
// - Steps : non-nil only for WORKFLOW_DONE_EVENT — full recap of every step
|
||||
// so oc-scheduler and oc-catalog can catch up if they missed STEP_DONE events
|
||||
type WorkflowLifecycleEvent struct {
|
||||
ExecutionID string `json:"execution_id"`
|
||||
ExecutionsID string `json:"executions_id"`
|
||||
BookingID string `json:"booking_id,omitempty"`
|
||||
State int `json:"state"`
|
||||
RealStart *time.Time `json:"real_start,omitempty"`
|
||||
RealEnd *time.Time `json:"real_end,omitempty"`
|
||||
Steps []StepMetric `json:"steps,omitempty"`
|
||||
}
|
||||
Reference in New Issue
Block a user