Files
oc-discovery/daemons/node/common/common_heartbeat.go

256 lines
9.5 KiB
Go
Raw Normal View History

2026-03-11 16:28:15 +01:00
package common
import (
"context"
"encoding/json"
"io"
"time"
"github.com/libp2p/go-libp2p/core/host"
"github.com/libp2p/go-libp2p/core/network"
pp "github.com/libp2p/go-libp2p/core/peer"
oclib "cloud.o-forge.io/core/oc-lib"
)
type Heartbeat struct {
Name string `json:"name"`
Stream *Stream `json:"stream"`
DID string `json:"did"`
PeerID string `json:"peer_id"`
Timestamp int64 `json:"timestamp"`
IndexersBinded []string `json:"indexers_binded"`
Score float64
// Record carries a fresh signed PeerRecord (JSON) so the receiving indexer
// can republish it to the DHT without an extra round-trip.
// Only set by nodes (not indexers heartbeating other indexers).
Record json.RawMessage `json:"record,omitempty"`
// Need is how many more indexers this node wants (MaxIndexer - current pool size).
// The receiving indexer uses this to know how many suggestions to return.
// 0 means the pool is full — no suggestions needed unless SuggestMigrate.
Need int `json:"need,omitempty"`
// Challenges is a list of PeerIDs the node asks the indexer to spot-check.
// Always includes the node's own PeerID (ground truth) + up to 2 additional
// known peers. Nil means no challenge this tick.
Challenges []string `json:"challenges,omitempty"`
// ChallengeDID asks the indexer to retrieve this DID from the DHT (every 5th batch).
ChallengeDID string `json:"challenge_did,omitempty"`
// Referent marks this indexer as the node's designated search referent.
// Only one indexer per node receives Referent=true at a time (the best-scored one).
// The indexer stores the node in its referencedNodes for distributed search.
Referent bool `json:"referent,omitempty"`
}
// SearchPeerRequest is sent by a node to an indexer via ProtocolSearchPeer.
// The indexer broadcasts it on the GossipSub search mesh and streams results back.
type SearchPeerRequest struct {
QueryID string `json:"query_id"`
// At least one of PeerID, DID, Name must be set.
PeerID string `json:"peer_id,omitempty"`
DID string `json:"did,omitempty"`
Name string `json:"name,omitempty"`
}
// SearchQuery is broadcast on TopicSearchPeer by the receiving indexer.
// EmitterID is the indexer's own PeerID — responding indexers open a
// ProtocolSearchPeerResponse stream back to it.
type SearchQuery struct {
QueryID string `json:"query_id"`
PeerID string `json:"peer_id,omitempty"`
DID string `json:"did,omitempty"`
Name string `json:"name,omitempty"`
EmitterID string `json:"emitter_id"`
}
// SearchPeerResult is sent by a responding indexer to the emitting indexer
// via ProtocolSearchPeerResponse, and forwarded by the emitting indexer to
// the node on the open ProtocolSearchPeer stream.
type SearchPeerResult struct {
QueryID string `json:"query_id"`
Records []SearchHit `json:"records"`
}
// SearchHit is a single peer found during distributed search.
type SearchHit struct {
PeerID string `json:"peer_id"`
DID string `json:"did"`
Name string `json:"name"`
}
// ChallengeEntry is the indexer's raw answer for one challenged peer.
type ChallengeEntry struct {
PeerID string `json:"peer_id"`
Found bool `json:"found"`
LastSeen time.Time `json:"last_seen,omitempty"` // zero if not found
}
// HeartbeatResponse carries raw metrics only — no pre-cooked score.
type HeartbeatResponse struct {
FillRate float64 `json:"fill_rate"`
PeerCount int `json:"peer_count"`
MaxNodes int `json:"max_nodes"` // capacity — lets node cross-check fillRate
BornAt time.Time `json:"born_at"`
Challenges []ChallengeEntry `json:"challenges,omitempty"`
// DHTFound / DHTPayload: response to a ChallengeDID request.
DHTFound bool `json:"dht_found,omitempty"`
DHTPayload json.RawMessage `json:"dht_payload,omitempty"`
// Witnesses: random sample of connected nodes so the querying node can cross-check.
Witnesses []pp.AddrInfo `json:"witnesses,omitempty"`
// Suggestions: better indexers this indexer knows about via its DHT cache.
// The node should open heartbeat connections to these (they become StaticIndexers).
Suggestions []pp.AddrInfo `json:"suggestions,omitempty"`
// SuggestMigrate: set when this indexer is overloaded (fill rate > threshold)
// and is actively trying to hand the node off to the Suggestions list.
// Seeds: node de-stickies this indexer once it has MinIndexer non-seed alternatives.
// Non-seeds: node removes this indexer immediately if it has enough alternatives.
SuggestMigrate bool `json:"suggest_migrate,omitempty"`
}
// ComputeIndexerScore computes a composite quality score [0, 100] for the connecting peer.
// - uptimeRatio: fraction of tracked lifetime online (gap-aware) — peer reliability
// - bpms: bandwidth normalized to MaxExpectedMbps — link capacity
// - diversity: indexer's own /24 subnet diversity — network topology quality
// - latencyScore: 1 - RTT/maxRoundTrip — link responsiveness
// - fillRate: fraction of indexer slots used (0=empty, 1=full) — collective trust signal:
// a fuller indexer has been chosen and retained by many peers, which is evidence of quality.
func (hb *Heartbeat) ComputeIndexerScore(uptimeRatio float64, bpms float64, diversity float64, latencyScore float64, fillRate float64) {
hb.Score = ((0.20 * uptimeRatio) +
(0.20 * bpms) +
(0.20 * diversity) +
(0.15 * latencyScore) +
(0.25 * fillRate)) * 100
}
type HeartbeatInfo []struct {
Info []byte `json:"info"`
}
// WitnessRequest is sent by a node to a peer to ask its view of a given indexer.
type WitnessRequest struct {
IndexerPeerID string `json:"indexer_peer_id"`
}
// WitnessReport is returned by a peer in response to a WitnessRequest.
type WitnessReport struct {
Seen bool `json:"seen"`
BornAt time.Time `json:"born_at,omitempty"`
FillRate float64 `json:"fill_rate,omitempty"`
Score float64 `json:"score,omitempty"`
}
// HandleBandwidthProbe echoes back everything written on the stream, then closes.
// It is registered by all participants so the measuring side (the heartbeat receiver)
// can open a dedicated probe stream and read the round-trip latency + throughput.
func HandleBandwidthProbe(s network.Stream) {
defer s.Close()
s.SetDeadline(time.Now().Add(10 * time.Second))
io.Copy(s, s) // echo every byte back to the sender
}
// HandleWitnessQuery answers a witness query: the caller wants to know
// what this node thinks of a given indexer (identified by its PeerID).
func HandleWitnessQuery(h host.Host, s network.Stream) {
defer s.Close()
s.SetDeadline(time.Now().Add(5 * time.Second))
var req WitnessRequest
if err := json.NewDecoder(s).Decode(&req); err != nil {
return
}
report := WitnessReport{}
for _, ai := range Indexers.GetAddrs() {
if ai.Info == nil || ai.Info.ID.String() != req.IndexerPeerID {
continue
}
if score := Indexers.GetScore(addrKey(*ai.Info)); score != nil {
report.Seen = true
report.BornAt = score.LastBornAt
report.FillRate = score.LastFillRate
report.Score = score.Score
}
break
}
json.NewEncoder(s).Encode(report)
}
// SupportsHeartbeat probes pid with a short-lived stream to verify it has
// a ProtocolHeartbeat handler (i.e. it is an indexer, not a plain node).
// Only protocol negotiation is performed — no data is sent.
// Returns false on any error, including "protocol not supported".
func SupportsHeartbeat(h host.Host, pid pp.ID) bool {
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
defer cancel()
s, err := h.NewStream(ctx, pid, ProtocolHeartbeat)
if err != nil {
return false
}
s.Reset()
return true
}
// queryWitnesses contacts each witness in parallel, collects their view of the
// indexer, and updates score.witnessChecked / score.witnessConsistent.
// Called in a goroutine — must not hold any lock.
func queryWitnesses(h host.Host, indexerPeerID string, indexerBornAt time.Time, indexerFillRate float64, witnesses []pp.AddrInfo, score *Score) {
logger := oclib.GetLogger()
type result struct{ consistent bool }
results := make(chan result, len(witnesses))
for _, ai := range witnesses {
if ai.ID == h.ID() {
// Never query ourselves — skip and count as inconclusive.
results <- result{}
continue
}
go func(ai pp.AddrInfo) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
s, err := h.NewStream(ctx, ai.ID, ProtocolWitnessQuery)
if err != nil {
results <- result{}
return
}
defer s.Close()
s.SetDeadline(time.Now().Add(5 * time.Second))
if err := json.NewEncoder(s).Encode(WitnessRequest{IndexerPeerID: indexerPeerID}); err != nil {
results <- result{}
return
}
var rep WitnessReport
if err := json.NewDecoder(s).Decode(&rep); err != nil || !rep.Seen {
results <- result{}
return
}
// BornAt must be identical (fixed timestamp).
bornAtOK := !rep.BornAt.IsZero() && rep.BornAt.Equal(indexerBornAt)
// FillRate coherent within ±25% (it fluctuates normally).
diff := rep.FillRate - indexerFillRate
if diff < 0 {
diff = -diff
}
fillOK := diff < 0.25
consistent := bornAtOK && fillOK
logger.Debug().
Str("witness", ai.ID.String()).
Bool("bornAt_ok", bornAtOK).
Bool("fill_ok", fillOK).
Msg("witness report")
results <- result{consistent: consistent}
}(ai)
}
checked, consistent := 0, 0
for range witnesses {
r := <-results
checked++
if r.consistent {
consistent++
}
}
if checked == 0 {
return
}
score.witnessChecked += checked
score.witnessConsistent += consistent
}