package common import ( "context" "encoding/json" "sort" "time" oclib "cloud.o-forge.io/core/oc-lib" "github.com/libp2p/go-libp2p/core/host" "github.com/libp2p/go-libp2p/core/network" pp "github.com/libp2p/go-libp2p/core/peer" ) // ProtocolIndexerCandidates is opened by a node toward its remaining indexers // to request candidate replacement indexers after an ejection event. const ProtocolIndexerCandidates = "/opencloud/indexer/candidates/1.0" // IndexerCandidatesRequest is sent by a node to one of its indexers. // Count is how many candidates are needed. type IndexerCandidatesRequest struct { Count int `json:"count"` } // IndexerCandidatesResponse carries a random sample of known indexers from // the responding indexer's DHT cache. type IndexerCandidatesResponse struct { Candidates []pp.AddrInfo `json:"candidates"` } // TriggerConsensus asks each remaining indexer for a random pool of candidates, // scores them asynchronously via a one-shot probe heartbeat, and admits the // best ones to StaticIndexers. Falls back to DHT replenishment for any gap. // // Must be called in a goroutine — it blocks until all probes have returned // (or timed out), which can take up to ~10s. func TriggerConsensus(h host.Host, remaining []pp.AddrInfo, need int) { if need <= 0 || len(remaining) == 0 { return } logger := oclib.GetLogger() logger.Info().Int("voters", len(remaining)).Int("need", need). Msg("[consensus] starting indexer candidate consensus") // Phase 1 — collect candidates from all remaining indexers in parallel. type collectResult struct{ candidates []pp.AddrInfo } collectCh := make(chan collectResult, len(remaining)) for _, ai := range remaining { go func(ai pp.AddrInfo) { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() s, err := h.NewStream(ctx, ai.ID, ProtocolIndexerCandidates) if err != nil { collectCh <- collectResult{} return } defer s.Close() s.SetDeadline(time.Now().Add(5 * time.Second)) if err := json.NewEncoder(s).Encode(IndexerCandidatesRequest{Count: need + 2}); err != nil { collectCh <- collectResult{} return } var resp IndexerCandidatesResponse if err := json.NewDecoder(s).Decode(&resp); err != nil { collectCh <- collectResult{} return } collectCh <- collectResult{candidates: resp.Candidates} }(ai) } // Merge and deduplicate, excluding indexers already in the pool. seen := map[pp.ID]struct{}{} for _, ai := range Indexers.GetAddrIDs() { seen[ai] = struct{}{} } var candidates []pp.AddrInfo for range remaining { r := <-collectCh for _, ai := range r.candidates { if _, dup := seen[ai.ID]; !dup { seen[ai.ID] = struct{}{} candidates = append(candidates, ai) } } } if len(candidates) == 0 { logger.Info().Msg("[consensus] no candidates from voters, falling back to DHT") replenishIndexersFromDHT(h, need) return } logger.Info().Int("candidates", len(candidates)).Msg("[consensus] scoring candidates") // Phase 2 — score all candidates in parallel via a one-shot probe heartbeat. type scoreResult struct { ai pp.AddrInfo score float64 } scoreCh := make(chan scoreResult, len(candidates)) for _, ai := range candidates { go func(ai pp.AddrInfo) { resp, rtt, err := probeIndexer(h, ai) if err != nil { scoreCh <- scoreResult{ai: ai, score: 0} return } scoreCh <- scoreResult{ai: ai, score: quickScore(resp, rtt)} }(ai) } results := make([]scoreResult, 0, len(candidates)) for range candidates { results = append(results, <-scoreCh) } // Sort descending by quick score, admit top `need` above the minimum bar. sort.Slice(results, func(i, j int) bool { return results[i].score > results[j].score }) minQ := dynamicMinScore(0) // fresh peer: threshold starts at 20 admitted := 0 for _, res := range results { if admitted >= need { break } if res.score < minQ { break // sorted desc: everything after is worse } key := addrKey(res.ai) if Indexers.ExistsAddr(key) { continue // already in pool (race with heartbeat path) } cpy := res.ai Indexers.SetAddr(key, &cpy) admitted++ } if admitted > 0 { logger.Info().Int("admitted", admitted).Msg("[consensus] candidates admitted to pool") Indexers.NudgeIt() } // Fill any remaining gap with DHT discovery. if gap := need - admitted; gap > 0 { logger.Info().Int("gap", gap).Msg("[consensus] gap after consensus, falling back to DHT") replenishIndexersFromDHT(h, gap) } } // probeIndexer dials the candidate, sends one lightweight heartbeat, and // returns the HeartbeatResponse (nil if the indexer doesn't support it) and RTT. func probeIndexer(h host.Host, ai pp.AddrInfo) (*HeartbeatResponse, time.Duration, error) { ctx, cancel := context.WithTimeout(context.Background(), 8*time.Second) defer cancel() if h.Network().Connectedness(ai.ID) != network.Connected { if err := h.Connect(ctx, ai); err != nil { return nil, 0, err } } s, err := h.NewStream(ctx, ai.ID, ProtocolHeartbeat) if err != nil { return nil, 0, err } defer s.Close() hb := Heartbeat{PeerID: h.ID().String(), Timestamp: time.Now().UTC().Unix()} s.SetWriteDeadline(time.Now().Add(3 * time.Second)) if err := json.NewEncoder(s).Encode(hb); err != nil { return nil, 0, err } s.SetWriteDeadline(time.Time{}) sentAt := time.Now() s.SetReadDeadline(time.Now().Add(5 * time.Second)) var resp HeartbeatResponse if err := json.NewDecoder(s).Decode(&resp); err != nil { // Indexer connected but no response: connection itself is the signal. return nil, time.Since(sentAt), nil } return &resp, time.Since(sentAt), nil } // quickScore computes a lightweight score [0,100] from a probe result. // Uses only fill rate (inverse) and latency — the two signals available // without a full heartbeat history. func quickScore(resp *HeartbeatResponse, rtt time.Duration) float64 { maxRTT := BaseRoundTrip * 10 latencyScore := 1.0 - float64(rtt)/float64(maxRTT) if latencyScore < 0 { latencyScore = 0 } if resp == nil { // Connection worked but no response (old indexer): moderate score. return latencyScore * 50 } fillScore := 1.0 - resp.FillRate // prefer less-loaded indexers return (0.5*latencyScore + 0.5*fillScore) * 100 }