Simple Architecture
This commit is contained in:
199
daemons/node/common/consensus.go
Normal file
199
daemons/node/common/consensus.go
Normal file
@@ -0,0 +1,199 @@
|
||||
package common
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
oclib "cloud.o-forge.io/core/oc-lib"
|
||||
"github.com/libp2p/go-libp2p/core/host"
|
||||
"github.com/libp2p/go-libp2p/core/network"
|
||||
pp "github.com/libp2p/go-libp2p/core/peer"
|
||||
)
|
||||
|
||||
// ProtocolIndexerCandidates is opened by a node toward its remaining indexers
|
||||
// to request candidate replacement indexers after an ejection event.
|
||||
const ProtocolIndexerCandidates = "/opencloud/indexer/candidates/1.0"
|
||||
|
||||
// IndexerCandidatesRequest is sent by a node to one of its indexers.
|
||||
// Count is how many candidates are needed.
|
||||
type IndexerCandidatesRequest struct {
|
||||
Count int `json:"count"`
|
||||
}
|
||||
|
||||
// IndexerCandidatesResponse carries a random sample of known indexers from
|
||||
// the responding indexer's DHT cache.
|
||||
type IndexerCandidatesResponse struct {
|
||||
Candidates []pp.AddrInfo `json:"candidates"`
|
||||
}
|
||||
|
||||
// TriggerConsensus asks each remaining indexer for a random pool of candidates,
|
||||
// scores them asynchronously via a one-shot probe heartbeat, and admits the
|
||||
// best ones to StaticIndexers. Falls back to DHT replenishment for any gap.
|
||||
//
|
||||
// Must be called in a goroutine — it blocks until all probes have returned
|
||||
// (or timed out), which can take up to ~10s.
|
||||
func TriggerConsensus(h host.Host, remaining []pp.AddrInfo, need int) {
|
||||
if need <= 0 || len(remaining) == 0 {
|
||||
return
|
||||
}
|
||||
logger := oclib.GetLogger()
|
||||
logger.Info().Int("voters", len(remaining)).Int("need", need).
|
||||
Msg("[consensus] starting indexer candidate consensus")
|
||||
|
||||
// Phase 1 — collect candidates from all remaining indexers in parallel.
|
||||
type collectResult struct{ candidates []pp.AddrInfo }
|
||||
collectCh := make(chan collectResult, len(remaining))
|
||||
for _, ai := range remaining {
|
||||
go func(ai pp.AddrInfo) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
s, err := h.NewStream(ctx, ai.ID, ProtocolIndexerCandidates)
|
||||
if err != nil {
|
||||
collectCh <- collectResult{}
|
||||
return
|
||||
}
|
||||
defer s.Close()
|
||||
s.SetDeadline(time.Now().Add(5 * time.Second))
|
||||
if err := json.NewEncoder(s).Encode(IndexerCandidatesRequest{Count: need + 2}); err != nil {
|
||||
collectCh <- collectResult{}
|
||||
return
|
||||
}
|
||||
var resp IndexerCandidatesResponse
|
||||
if err := json.NewDecoder(s).Decode(&resp); err != nil {
|
||||
collectCh <- collectResult{}
|
||||
return
|
||||
}
|
||||
collectCh <- collectResult{candidates: resp.Candidates}
|
||||
}(ai)
|
||||
}
|
||||
|
||||
// Merge and deduplicate, excluding indexers already in the pool.
|
||||
seen := map[pp.ID]struct{}{}
|
||||
for _, ai := range Indexers.GetAddrIDs() {
|
||||
seen[ai] = struct{}{}
|
||||
|
||||
}
|
||||
var candidates []pp.AddrInfo
|
||||
for range remaining {
|
||||
r := <-collectCh
|
||||
for _, ai := range r.candidates {
|
||||
if _, dup := seen[ai.ID]; !dup {
|
||||
seen[ai.ID] = struct{}{}
|
||||
candidates = append(candidates, ai)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(candidates) == 0 {
|
||||
logger.Info().Msg("[consensus] no candidates from voters, falling back to DHT")
|
||||
replenishIndexersFromDHT(h, need)
|
||||
return
|
||||
}
|
||||
logger.Info().Int("candidates", len(candidates)).Msg("[consensus] scoring candidates")
|
||||
|
||||
// Phase 2 — score all candidates in parallel via a one-shot probe heartbeat.
|
||||
type scoreResult struct {
|
||||
ai pp.AddrInfo
|
||||
score float64
|
||||
}
|
||||
scoreCh := make(chan scoreResult, len(candidates))
|
||||
for _, ai := range candidates {
|
||||
go func(ai pp.AddrInfo) {
|
||||
resp, rtt, err := probeIndexer(h, ai)
|
||||
if err != nil {
|
||||
scoreCh <- scoreResult{ai: ai, score: 0}
|
||||
return
|
||||
}
|
||||
scoreCh <- scoreResult{ai: ai, score: quickScore(resp, rtt)}
|
||||
}(ai)
|
||||
}
|
||||
|
||||
results := make([]scoreResult, 0, len(candidates))
|
||||
for range candidates {
|
||||
results = append(results, <-scoreCh)
|
||||
}
|
||||
|
||||
// Sort descending by quick score, admit top `need` above the minimum bar.
|
||||
sort.Slice(results, func(i, j int) bool { return results[i].score > results[j].score })
|
||||
minQ := dynamicMinScore(0) // fresh peer: threshold starts at 20
|
||||
|
||||
admitted := 0
|
||||
for _, res := range results {
|
||||
if admitted >= need {
|
||||
break
|
||||
}
|
||||
if res.score < minQ {
|
||||
break // sorted desc: everything after is worse
|
||||
}
|
||||
key := addrKey(res.ai)
|
||||
if Indexers.ExistsAddr(key) {
|
||||
continue // already in pool (race with heartbeat path)
|
||||
}
|
||||
cpy := res.ai
|
||||
Indexers.SetAddr(key, &cpy)
|
||||
admitted++
|
||||
}
|
||||
|
||||
if admitted > 0 {
|
||||
logger.Info().Int("admitted", admitted).Msg("[consensus] candidates admitted to pool")
|
||||
Indexers.NudgeIt()
|
||||
}
|
||||
|
||||
// Fill any remaining gap with DHT discovery.
|
||||
if gap := need - admitted; gap > 0 {
|
||||
logger.Info().Int("gap", gap).Msg("[consensus] gap after consensus, falling back to DHT")
|
||||
replenishIndexersFromDHT(h, gap)
|
||||
}
|
||||
}
|
||||
|
||||
// probeIndexer dials the candidate, sends one lightweight heartbeat, and
|
||||
// returns the HeartbeatResponse (nil if the indexer doesn't support it) and RTT.
|
||||
func probeIndexer(h host.Host, ai pp.AddrInfo) (*HeartbeatResponse, time.Duration, error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 8*time.Second)
|
||||
defer cancel()
|
||||
if h.Network().Connectedness(ai.ID) != network.Connected {
|
||||
if err := h.Connect(ctx, ai); err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
}
|
||||
s, err := h.NewStream(ctx, ai.ID, ProtocolHeartbeat)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
defer s.Close()
|
||||
|
||||
hb := Heartbeat{PeerID: h.ID().String(), Timestamp: time.Now().UTC().Unix()}
|
||||
s.SetWriteDeadline(time.Now().Add(3 * time.Second))
|
||||
if err := json.NewEncoder(s).Encode(hb); err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
s.SetWriteDeadline(time.Time{})
|
||||
|
||||
sentAt := time.Now()
|
||||
s.SetReadDeadline(time.Now().Add(5 * time.Second))
|
||||
var resp HeartbeatResponse
|
||||
if err := json.NewDecoder(s).Decode(&resp); err != nil {
|
||||
// Indexer connected but no response: connection itself is the signal.
|
||||
return nil, time.Since(sentAt), nil
|
||||
}
|
||||
return &resp, time.Since(sentAt), nil
|
||||
}
|
||||
|
||||
// quickScore computes a lightweight score [0,100] from a probe result.
|
||||
// Uses only fill rate (inverse) and latency — the two signals available
|
||||
// without a full heartbeat history.
|
||||
func quickScore(resp *HeartbeatResponse, rtt time.Duration) float64 {
|
||||
maxRTT := BaseRoundTrip * 10
|
||||
latencyScore := 1.0 - float64(rtt)/float64(maxRTT)
|
||||
if latencyScore < 0 {
|
||||
latencyScore = 0
|
||||
}
|
||||
if resp == nil {
|
||||
// Connection worked but no response (old indexer): moderate score.
|
||||
return latencyScore * 50
|
||||
}
|
||||
fillScore := 1.0 - resp.FillRate // prefer less-loaded indexers
|
||||
return (0.5*latencyScore + 0.5*fillScore) * 100
|
||||
}
|
||||
Reference in New Issue
Block a user