Simple Architecture
This commit is contained in:
295
daemons/node/common/common_heartbeat.go
Normal file
295
daemons/node/common/common_heartbeat.go
Normal file
@@ -0,0 +1,295 @@
|
||||
package common
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"time"
|
||||
|
||||
"github.com/libp2p/go-libp2p/core/host"
|
||||
"github.com/libp2p/go-libp2p/core/network"
|
||||
pp "github.com/libp2p/go-libp2p/core/peer"
|
||||
|
||||
oclib "cloud.o-forge.io/core/oc-lib"
|
||||
)
|
||||
|
||||
type Heartbeat struct {
|
||||
Name string `json:"name"`
|
||||
Stream *Stream `json:"stream"`
|
||||
DID string `json:"did"`
|
||||
PeerID string `json:"peer_id"`
|
||||
Timestamp int64 `json:"timestamp"`
|
||||
IndexersBinded []string `json:"indexers_binded"`
|
||||
Score float64
|
||||
// Record carries a fresh signed PeerRecord (JSON) so the receiving indexer
|
||||
// can republish it to the DHT without an extra round-trip.
|
||||
// Only set by nodes (not indexers heartbeating other indexers).
|
||||
Record json.RawMessage `json:"record,omitempty"`
|
||||
// Need is how many more indexers this node wants (MaxIndexer - current pool size).
|
||||
// The receiving indexer uses this to know how many suggestions to return.
|
||||
// 0 means the pool is full — no suggestions needed unless SuggestMigrate.
|
||||
Need int `json:"need,omitempty"`
|
||||
// Challenges is a list of PeerIDs the node asks the indexer to spot-check.
|
||||
// Always includes the node's own PeerID (ground truth) + up to 2 additional
|
||||
// known peers. Nil means no challenge this tick.
|
||||
Challenges []string `json:"challenges,omitempty"`
|
||||
// ChallengeDID asks the indexer to retrieve this DID from the DHT (every 5th batch).
|
||||
ChallengeDID string `json:"challenge_did,omitempty"`
|
||||
// Referent marks this indexer as the node's designated search referent.
|
||||
// Only one indexer per node receives Referent=true at a time (the best-scored one).
|
||||
// The indexer stores the node in its referencedNodes for distributed search.
|
||||
Referent bool `json:"referent,omitempty"`
|
||||
}
|
||||
|
||||
// SearchPeerRequest is sent by a node to an indexer via ProtocolSearchPeer.
|
||||
// The indexer broadcasts it on the GossipSub search mesh and streams results back.
|
||||
type SearchPeerRequest struct {
|
||||
QueryID string `json:"query_id"`
|
||||
// At least one of PeerID, DID, Name must be set.
|
||||
PeerID string `json:"peer_id,omitempty"`
|
||||
DID string `json:"did,omitempty"`
|
||||
Name string `json:"name,omitempty"`
|
||||
}
|
||||
|
||||
// SearchQuery is broadcast on TopicSearchPeer by the receiving indexer.
|
||||
// EmitterID is the indexer's own PeerID — responding indexers open a
|
||||
// ProtocolSearchPeerResponse stream back to it.
|
||||
type SearchQuery struct {
|
||||
QueryID string `json:"query_id"`
|
||||
PeerID string `json:"peer_id,omitempty"`
|
||||
DID string `json:"did,omitempty"`
|
||||
Name string `json:"name,omitempty"`
|
||||
EmitterID string `json:"emitter_id"`
|
||||
}
|
||||
|
||||
// SearchPeerResult is sent by a responding indexer to the emitting indexer
|
||||
// via ProtocolSearchPeerResponse, and forwarded by the emitting indexer to
|
||||
// the node on the open ProtocolSearchPeer stream.
|
||||
type SearchPeerResult struct {
|
||||
QueryID string `json:"query_id"`
|
||||
Records []SearchHit `json:"records"`
|
||||
}
|
||||
|
||||
// SearchHit is a single peer found during distributed search.
|
||||
type SearchHit struct {
|
||||
PeerID string `json:"peer_id"`
|
||||
DID string `json:"did"`
|
||||
Name string `json:"name"`
|
||||
}
|
||||
|
||||
// ChallengeEntry is the indexer's raw answer for one challenged peer.
|
||||
type ChallengeEntry struct {
|
||||
PeerID string `json:"peer_id"`
|
||||
Found bool `json:"found"`
|
||||
LastSeen time.Time `json:"last_seen,omitempty"` // zero if not found
|
||||
}
|
||||
|
||||
// HeartbeatResponse carries raw metrics only — no pre-cooked score.
|
||||
type HeartbeatResponse struct {
|
||||
FillRate float64 `json:"fill_rate"`
|
||||
PeerCount int `json:"peer_count"`
|
||||
MaxNodes int `json:"max_nodes"` // capacity — lets node cross-check fillRate
|
||||
BornAt time.Time `json:"born_at"`
|
||||
Challenges []ChallengeEntry `json:"challenges,omitempty"`
|
||||
// DHTFound / DHTPayload: response to a ChallengeDID request.
|
||||
DHTFound bool `json:"dht_found,omitempty"`
|
||||
DHTPayload json.RawMessage `json:"dht_payload,omitempty"`
|
||||
// Witnesses: random sample of connected nodes so the querying node can cross-check.
|
||||
Witnesses []pp.AddrInfo `json:"witnesses,omitempty"`
|
||||
// Suggestions: better indexers this indexer knows about via its DHT cache.
|
||||
// The node should open heartbeat connections to these (they become StaticIndexers).
|
||||
Suggestions []pp.AddrInfo `json:"suggestions,omitempty"`
|
||||
// SuggestMigrate: set when this indexer is overloaded (fill rate > threshold)
|
||||
// and is actively trying to hand the node off to the Suggestions list.
|
||||
// Seeds: node de-stickies this indexer once it has MinIndexer non-seed alternatives.
|
||||
// Non-seeds: node removes this indexer immediately if it has enough alternatives.
|
||||
SuggestMigrate bool `json:"suggest_migrate,omitempty"`
|
||||
}
|
||||
|
||||
// ComputeIndexerScore computes a composite quality score [0, 100] for the connecting peer.
|
||||
// - uptimeRatio: fraction of tracked lifetime online (gap-aware) — peer reliability
|
||||
// - bpms: bandwidth normalized to MaxExpectedMbps — link capacity
|
||||
// - diversity: indexer's own /24 subnet diversity — network topology quality
|
||||
// - latencyScore: 1 - RTT/maxRoundTrip — link responsiveness
|
||||
// - fillRate: fraction of indexer slots used (0=empty, 1=full) — collective trust signal:
|
||||
// a fuller indexer has been chosen and retained by many peers, which is evidence of quality.
|
||||
func (hb *Heartbeat) ComputeIndexerScore(uptimeRatio float64, bpms float64, diversity float64, latencyScore float64, fillRate float64) {
|
||||
hb.Score = ((0.20 * uptimeRatio) +
|
||||
(0.20 * bpms) +
|
||||
(0.20 * diversity) +
|
||||
(0.15 * latencyScore) +
|
||||
(0.25 * fillRate)) * 100
|
||||
}
|
||||
|
||||
type HeartbeatInfo []struct {
|
||||
Info []byte `json:"info"`
|
||||
}
|
||||
|
||||
// WitnessRequest is sent by a node to a peer to ask its view of a given indexer.
|
||||
type WitnessRequest struct {
|
||||
IndexerPeerID string `json:"indexer_peer_id"`
|
||||
}
|
||||
|
||||
// WitnessReport is returned by a peer in response to a WitnessRequest.
|
||||
type WitnessReport struct {
|
||||
Seen bool `json:"seen"`
|
||||
BornAt time.Time `json:"born_at,omitempty"`
|
||||
FillRate float64 `json:"fill_rate,omitempty"`
|
||||
Score float64 `json:"score,omitempty"`
|
||||
}
|
||||
|
||||
// HandleBandwidthProbe echoes back everything written on the stream, then closes.
|
||||
// It is registered by all participants so the measuring side (the heartbeat receiver)
|
||||
// can open a dedicated probe stream and read the round-trip latency + throughput.
|
||||
func HandleBandwidthProbe(s network.Stream) {
|
||||
defer s.Close()
|
||||
s.SetDeadline(time.Now().Add(10 * time.Second))
|
||||
io.Copy(s, s) // echo every byte back to the sender
|
||||
}
|
||||
|
||||
// HandleWitnessQuery answers a witness query: the caller wants to know
|
||||
// what this node thinks of a given indexer (identified by its PeerID).
|
||||
func HandleWitnessQuery(h host.Host, s network.Stream) {
|
||||
defer s.Close()
|
||||
s.SetDeadline(time.Now().Add(5 * time.Second))
|
||||
var req WitnessRequest
|
||||
if err := json.NewDecoder(s).Decode(&req); err != nil {
|
||||
return
|
||||
}
|
||||
report := WitnessReport{}
|
||||
for _, ai := range Indexers.GetAddrs() {
|
||||
if ai.Info == nil || ai.Info.ID.String() != req.IndexerPeerID {
|
||||
continue
|
||||
}
|
||||
if score := Indexers.GetScore(addrKey(*ai.Info)); score != nil {
|
||||
report.Seen = true
|
||||
report.BornAt = score.LastBornAt
|
||||
report.FillRate = score.LastFillRate
|
||||
report.Score = score.Score
|
||||
}
|
||||
break
|
||||
}
|
||||
json.NewEncoder(s).Encode(report)
|
||||
}
|
||||
|
||||
// IndirectProbeIndexer asks each witness in the cache whether it still sees
|
||||
// the given indexer (by PeerID). Returns true if at least one witness confirms
|
||||
// it is alive — meaning our direct link is asymmetrically broken, not the indexer.
|
||||
// All probes run in parallel; the function blocks at most 5 seconds.
|
||||
func IndirectProbeIndexer(h host.Host, indexerPeerID string, pool []WitnessCacheEntry) bool {
|
||||
if len(pool) == 0 {
|
||||
return false
|
||||
}
|
||||
results := make(chan bool, len(pool))
|
||||
for _, e := range pool {
|
||||
go func(ai pp.AddrInfo) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
s, err := h.NewStream(ctx, ai.ID, ProtocolWitnessQuery)
|
||||
if err != nil {
|
||||
results <- false
|
||||
return
|
||||
}
|
||||
defer s.Reset()
|
||||
s.SetDeadline(time.Now().Add(5 * time.Second))
|
||||
if err := json.NewEncoder(s).Encode(WitnessRequest{IndexerPeerID: indexerPeerID}); err != nil {
|
||||
results <- false
|
||||
return
|
||||
}
|
||||
var rep WitnessReport
|
||||
if err := json.NewDecoder(s).Decode(&rep); err != nil {
|
||||
results <- false
|
||||
return
|
||||
}
|
||||
results <- rep.Seen
|
||||
}(e.AI)
|
||||
}
|
||||
for range pool {
|
||||
if <-results {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// SupportsHeartbeat probes pid with a short-lived stream to verify it has
|
||||
// a ProtocolHeartbeat handler (i.e. it is an indexer, not a plain node).
|
||||
// Only protocol negotiation is performed — no data is sent.
|
||||
// Returns false on any error, including "protocol not supported".
|
||||
func SupportsHeartbeat(h host.Host, pid pp.ID) bool {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
||||
defer cancel()
|
||||
s, err := h.NewStream(ctx, pid, ProtocolHeartbeat)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
s.Reset()
|
||||
return true
|
||||
}
|
||||
|
||||
// queryWitnesses contacts each witness in parallel, collects their view of the
|
||||
// indexer, and updates score.witnessChecked / score.witnessConsistent.
|
||||
// Called in a goroutine — must not hold any lock.
|
||||
func queryWitnesses(h host.Host, indexerPeerID string, indexerBornAt time.Time, indexerFillRate float64, witnesses []pp.AddrInfo, score *Score) {
|
||||
logger := oclib.GetLogger()
|
||||
type result struct{ consistent bool }
|
||||
results := make(chan result, len(witnesses))
|
||||
|
||||
for _, ai := range witnesses {
|
||||
if ai.ID == h.ID() {
|
||||
// Never query ourselves — skip and count as inconclusive.
|
||||
results <- result{}
|
||||
continue
|
||||
}
|
||||
go func(ai pp.AddrInfo) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
s, err := h.NewStream(ctx, ai.ID, ProtocolWitnessQuery)
|
||||
if err != nil {
|
||||
results <- result{}
|
||||
return
|
||||
}
|
||||
defer s.Close()
|
||||
s.SetDeadline(time.Now().Add(5 * time.Second))
|
||||
if err := json.NewEncoder(s).Encode(WitnessRequest{IndexerPeerID: indexerPeerID}); err != nil {
|
||||
results <- result{}
|
||||
return
|
||||
}
|
||||
var rep WitnessReport
|
||||
if err := json.NewDecoder(s).Decode(&rep); err != nil || !rep.Seen {
|
||||
results <- result{}
|
||||
return
|
||||
}
|
||||
// BornAt must be identical (fixed timestamp).
|
||||
bornAtOK := !rep.BornAt.IsZero() && rep.BornAt.Equal(indexerBornAt)
|
||||
// FillRate coherent within ±25% (it fluctuates normally).
|
||||
diff := rep.FillRate - indexerFillRate
|
||||
if diff < 0 {
|
||||
diff = -diff
|
||||
}
|
||||
fillOK := diff < 0.25
|
||||
consistent := bornAtOK && fillOK
|
||||
logger.Debug().
|
||||
Str("witness", ai.ID.String()).
|
||||
Bool("bornAt_ok", bornAtOK).
|
||||
Bool("fill_ok", fillOK).
|
||||
Msg("witness report")
|
||||
results <- result{consistent: consistent}
|
||||
}(ai)
|
||||
}
|
||||
|
||||
checked, consistent := 0, 0
|
||||
for range witnesses {
|
||||
r := <-results
|
||||
checked++
|
||||
if r.consistent {
|
||||
consistent++
|
||||
}
|
||||
}
|
||||
|
||||
if checked == 0 {
|
||||
return
|
||||
}
|
||||
score.witnessChecked += checked
|
||||
score.witnessConsistent += consistent
|
||||
}
|
||||
Reference in New Issue
Block a user