Simple Architecture
This commit is contained in:
@@ -10,7 +10,6 @@ type Config struct {
|
|||||||
PrivateKeyPath string
|
PrivateKeyPath string
|
||||||
NodeEndpointPort int64
|
NodeEndpointPort int64
|
||||||
IndexerAddresses string
|
IndexerAddresses string
|
||||||
NativeIndexerAddresses string // multiaddrs of native indexers, comma-separated; bypasses IndexerAddresses when set
|
|
||||||
|
|
||||||
PeerIDS string // TO REMOVE
|
PeerIDS string // TO REMOVE
|
||||||
|
|
||||||
@@ -18,11 +17,19 @@ type Config struct {
|
|||||||
|
|
||||||
MinIndexer int
|
MinIndexer int
|
||||||
MaxIndexer int
|
MaxIndexer int
|
||||||
|
// SearchTimeout is the max duration without a new result before the
|
||||||
|
// distributed peer search stream is closed. Default: 5s.
|
||||||
|
SearchTimeout int // seconds; 0 → use default (5)
|
||||||
|
|
||||||
// ConsensusQuorum is the minimum fraction of natives that must agree for a
|
// Indexer connection burst guard: max new connections accepted within the window.
|
||||||
// candidate indexer to be considered confirmed. Range (0, 1]. Default 0.5
|
// 0 → use defaults (20 new peers per 30s).
|
||||||
// (strict majority). Raise to 0.67 for stronger Byzantine resistance.
|
MaxConnPerWindow int // default 20
|
||||||
ConsensusQuorum float64
|
ConnWindowSecs int // default 30
|
||||||
|
|
||||||
|
// Per-node behavioral limits (sliding 60s window). 0 → use built-in defaults.
|
||||||
|
MaxHBPerMinute int // default 5
|
||||||
|
MaxPublishPerMinute int // default 10
|
||||||
|
MaxGetPerMinute int // default 50
|
||||||
}
|
}
|
||||||
|
|
||||||
var instance *Config
|
var instance *Config
|
||||||
|
|||||||
331
daemons/node/common/common_cache.go
Normal file
331
daemons/node/common/common_cache.go
Normal file
@@ -0,0 +1,331 @@
|
|||||||
|
package common
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
pp "github.com/libp2p/go-libp2p/core/peer"
|
||||||
|
"github.com/libp2p/go-libp2p/core/protocol"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Score struct {
|
||||||
|
FirstContacted time.Time
|
||||||
|
UptimeTracker *UptimeTracker
|
||||||
|
LastFillRate float64
|
||||||
|
Score float64
|
||||||
|
// IsSeed marks indexers that came from the IndexerAddresses static config.
|
||||||
|
// Seeds are sticky: they are never evicted by the score threshold alone.
|
||||||
|
// A seed is only removed when: (a) heartbeat fails, or (b) it sends
|
||||||
|
// SuggestMigrate and the node already has MinIndexer non-seed alternatives.
|
||||||
|
IsSeed bool
|
||||||
|
// challenge bookkeeping (2-3 peers per batch, raw data returned by indexer)
|
||||||
|
hbCount int // heartbeats sent since last challenge batch
|
||||||
|
nextChallenge int // send challenges when hbCount reaches this (rand 1-10)
|
||||||
|
challengeTotal int // number of own-PeerID challenges sent (ground truth)
|
||||||
|
challengeCorrect int // own PeerID found AND lastSeen within 2×interval
|
||||||
|
// fill rate consistency: cross-check reported fillRate vs peerCount/maxNodes
|
||||||
|
fillChecked int
|
||||||
|
fillConsistent int
|
||||||
|
// BornAt stability
|
||||||
|
LastBornAt time.Time
|
||||||
|
bornAtChanges int
|
||||||
|
// DHT challenge
|
||||||
|
dhtChecked int
|
||||||
|
dhtSuccess int
|
||||||
|
dhtBatchCounter int
|
||||||
|
// Peer witnesses
|
||||||
|
witnessChecked int
|
||||||
|
witnessConsistent int
|
||||||
|
// WitnessPool: up to 3 witnesses last reported by this indexer.
|
||||||
|
// Used for indirect probing when the indexer becomes unreachable.
|
||||||
|
// Oldest entry is replaced when the pool is full and a fresher witness arrives.
|
||||||
|
WitnessPool []WitnessCacheEntry
|
||||||
|
}
|
||||||
|
|
||||||
|
// WitnessCacheEntry holds one witness AddrInfo with its last-seen timestamp.
|
||||||
|
const maxWitnessPool = 3
|
||||||
|
|
||||||
|
type WitnessCacheEntry struct {
|
||||||
|
AI pp.AddrInfo
|
||||||
|
SeenAt time.Time
|
||||||
|
}
|
||||||
|
|
||||||
|
// UpdateWitnessPool inserts or refreshes a witness entry.
|
||||||
|
// If the pool is full and the witness is new, the oldest entry is replaced.
|
||||||
|
func (s *Score) UpdateWitnessPool(w pp.AddrInfo) {
|
||||||
|
for i, e := range s.WitnessPool {
|
||||||
|
if e.AI.ID == w.ID {
|
||||||
|
s.WitnessPool[i].AI = w
|
||||||
|
s.WitnessPool[i].SeenAt = time.Now()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
entry := WitnessCacheEntry{AI: w, SeenAt: time.Now()}
|
||||||
|
if len(s.WitnessPool) < maxWitnessPool {
|
||||||
|
s.WitnessPool = append(s.WitnessPool, entry)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Replace oldest.
|
||||||
|
oldest := 0
|
||||||
|
for i, e := range s.WitnessPool {
|
||||||
|
if e.SeenAt.Before(s.WitnessPool[oldest].SeenAt) {
|
||||||
|
oldest = i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
s.WitnessPool[oldest] = entry
|
||||||
|
}
|
||||||
|
|
||||||
|
// computeNodeSideScore computes the node's quality assessment of an indexer from raw metrics.
|
||||||
|
// All ratios are in [0,1]; result is in [0,100].
|
||||||
|
// - uptimeRatio : gap-aware fraction of lifetime the indexer was reachable
|
||||||
|
// - challengeAccuracy: own-PeerID challenges answered correctly (found + recent lastSeen)
|
||||||
|
// - latencyScore : 1 - RTT/maxRTT, clamped [0,1]
|
||||||
|
// - fillScore : 1 - fillRate — prefer less-loaded indexers
|
||||||
|
// - fillConsistency : fraction of ticks where peerCount/maxNodes ≈ fillRate (±10%)
|
||||||
|
func (s *Score) ComputeNodeSideScore(latencyScore float64) float64 {
|
||||||
|
uptime := s.UptimeTracker.UptimeRatio()
|
||||||
|
challengeAccuracy := 1.0
|
||||||
|
if s.challengeTotal > 0 {
|
||||||
|
challengeAccuracy = float64(s.challengeCorrect) / float64(s.challengeTotal)
|
||||||
|
}
|
||||||
|
fillScore := 1.0 - s.LastFillRate
|
||||||
|
fillConsistency := 1.0
|
||||||
|
if s.fillChecked > 0 {
|
||||||
|
fillConsistency = float64(s.fillConsistent) / float64(s.fillChecked)
|
||||||
|
}
|
||||||
|
witnessConsistency := 1.0
|
||||||
|
if s.witnessChecked > 0 {
|
||||||
|
witnessConsistency = float64(s.witnessConsistent) / float64(s.witnessChecked)
|
||||||
|
}
|
||||||
|
dhtSuccessRate := 1.0
|
||||||
|
if s.dhtChecked > 0 {
|
||||||
|
dhtSuccessRate = float64(s.dhtSuccess) / float64(s.dhtChecked)
|
||||||
|
}
|
||||||
|
base := ((0.20 * uptime) +
|
||||||
|
(0.20 * challengeAccuracy) +
|
||||||
|
(0.15 * latencyScore) +
|
||||||
|
(0.10 * fillScore) +
|
||||||
|
(0.10 * fillConsistency) +
|
||||||
|
(0.15 * witnessConsistency) +
|
||||||
|
(0.10 * dhtSuccessRate)) * 100
|
||||||
|
// BornAt stability: each unexpected BornAt change penalises by 30%.
|
||||||
|
bornAtPenalty := 1.0 - 0.30*float64(s.bornAtChanges)
|
||||||
|
if bornAtPenalty < 0 {
|
||||||
|
bornAtPenalty = 0
|
||||||
|
}
|
||||||
|
return base * bornAtPenalty
|
||||||
|
}
|
||||||
|
|
||||||
|
type Directory struct {
|
||||||
|
MuAddr sync.RWMutex
|
||||||
|
MuScore sync.RWMutex
|
||||||
|
MuStream sync.RWMutex
|
||||||
|
Addrs map[string]*pp.AddrInfo
|
||||||
|
Scores map[string]*Score
|
||||||
|
Nudge chan struct{}
|
||||||
|
Streams ProtocolStream
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *Directory) ExistsScore(a string) bool {
|
||||||
|
d.MuScore.RLock()
|
||||||
|
defer d.MuScore.RUnlock()
|
||||||
|
for addr, ai := range d.Scores {
|
||||||
|
if ai != nil && (a == addr) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *Directory) GetScore(a string) *Score {
|
||||||
|
d.MuScore.RLock()
|
||||||
|
defer d.MuScore.RUnlock()
|
||||||
|
for addr, s := range d.Scores {
|
||||||
|
if s != nil && (a == addr) {
|
||||||
|
sCopy := *s
|
||||||
|
return &sCopy
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *Directory) GetScores() map[string]*Score {
|
||||||
|
d.MuScore.RLock()
|
||||||
|
defer d.MuScore.RUnlock()
|
||||||
|
score := map[string]*Score{}
|
||||||
|
for addr, s := range d.Scores {
|
||||||
|
score[addr] = s
|
||||||
|
}
|
||||||
|
return score
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *Directory) DeleteScore(a string) {
|
||||||
|
d.MuScore.RLock()
|
||||||
|
defer d.MuScore.RUnlock()
|
||||||
|
score := map[string]*Score{}
|
||||||
|
for addr, s := range d.Scores {
|
||||||
|
if a != addr {
|
||||||
|
score[addr] = s
|
||||||
|
}
|
||||||
|
}
|
||||||
|
d.Scores = score
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *Directory) SetScore(addr string, score *Score) *pp.AddrInfo {
|
||||||
|
d.MuScore.Lock()
|
||||||
|
defer d.MuScore.Unlock()
|
||||||
|
d.Scores[addr] = score
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *Directory) ExistsAddr(addrOrId string) bool {
|
||||||
|
d.MuAddr.RLock()
|
||||||
|
defer d.MuAddr.RUnlock()
|
||||||
|
for addr, ai := range d.Addrs {
|
||||||
|
if ai != nil && (addrOrId == ai.ID.String() || addrOrId == addr) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *Directory) GetAddr(addrOrId string) *pp.AddrInfo {
|
||||||
|
d.MuAddr.RLock()
|
||||||
|
defer d.MuAddr.RUnlock()
|
||||||
|
for addr, ai := range d.Addrs {
|
||||||
|
if ai != nil && (addrOrId == ai.ID.String() || addrOrId == addr) {
|
||||||
|
aiCopy := *ai
|
||||||
|
return &aiCopy
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *Directory) DeleteAddr(a string) {
|
||||||
|
d.MuAddr.RLock()
|
||||||
|
defer d.MuAddr.RUnlock()
|
||||||
|
addrs := map[string]*pp.AddrInfo{}
|
||||||
|
for addr, s := range d.Addrs {
|
||||||
|
if a != addr {
|
||||||
|
addrs[addr] = s
|
||||||
|
}
|
||||||
|
}
|
||||||
|
d.Addrs = addrs
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *Directory) SetAddr(addr string, info *pp.AddrInfo) *pp.AddrInfo {
|
||||||
|
d.MuAddr.Lock()
|
||||||
|
defer d.MuAddr.Unlock()
|
||||||
|
d.Addrs[addr] = info
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *Directory) GetAddrIDs() []pp.ID {
|
||||||
|
d.MuAddr.RLock()
|
||||||
|
defer d.MuAddr.RUnlock()
|
||||||
|
indexers := make([]pp.ID, 0, len(d.Addrs))
|
||||||
|
for _, ai := range d.Addrs {
|
||||||
|
if ai != nil {
|
||||||
|
indexers = append(indexers, ai.ID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Shuffle(indexers)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *Directory) GetAddrsStr() []string {
|
||||||
|
d.MuAddr.RLock()
|
||||||
|
defer d.MuAddr.RUnlock()
|
||||||
|
indexers := make([]string, 0, len(d.Addrs))
|
||||||
|
for s, ai := range d.Addrs {
|
||||||
|
if ai != nil {
|
||||||
|
indexers = append(indexers, s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Shuffle(indexers)
|
||||||
|
}
|
||||||
|
|
||||||
|
type Entry struct {
|
||||||
|
Addr string
|
||||||
|
Info *pp.AddrInfo
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *Directory) GetAddrs() []Entry {
|
||||||
|
d.MuAddr.RLock()
|
||||||
|
defer d.MuAddr.RUnlock()
|
||||||
|
indexers := make([]Entry, 0, len(d.Addrs))
|
||||||
|
for addr, ai := range d.Addrs {
|
||||||
|
if ai != nil {
|
||||||
|
indexers = append(indexers, Entry{
|
||||||
|
Addr: addr,
|
||||||
|
Info: ai,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Shuffle(indexers)
|
||||||
|
}
|
||||||
|
|
||||||
|
// NudgeIndexerHeartbeat signals the indexer heartbeat goroutine to fire immediately.
|
||||||
|
func (d *Directory) NudgeIt() {
|
||||||
|
select {
|
||||||
|
case d.Nudge <- struct{}{}:
|
||||||
|
default: // nudge already pending, skip
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type ProtocolStream map[protocol.ID]map[pp.ID]*Stream
|
||||||
|
|
||||||
|
func (ps ProtocolStream) Get(protocol protocol.ID) map[pp.ID]*Stream {
|
||||||
|
if ps[protocol] == nil {
|
||||||
|
ps[protocol] = map[pp.ID]*Stream{}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ps[protocol]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ps ProtocolStream) GetPerID(protocol protocol.ID, peerID pp.ID) *Stream {
|
||||||
|
if ps[protocol] == nil {
|
||||||
|
ps[protocol] = map[pp.ID]*Stream{}
|
||||||
|
}
|
||||||
|
return ps[protocol][peerID]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ps ProtocolStream) Add(protocol protocol.ID, peerID *pp.ID, s *Stream) error {
|
||||||
|
if ps[protocol] == nil {
|
||||||
|
ps[protocol] = map[pp.ID]*Stream{}
|
||||||
|
}
|
||||||
|
if peerID != nil {
|
||||||
|
if s != nil {
|
||||||
|
ps[protocol][*peerID] = s
|
||||||
|
} else {
|
||||||
|
return errors.New("unable to add stream : stream missing")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ps ProtocolStream) Delete(protocol protocol.ID, peerID *pp.ID) {
|
||||||
|
if streams, ok := ps[protocol]; ok {
|
||||||
|
if peerID != nil && streams[*peerID] != nil && streams[*peerID].Stream != nil {
|
||||||
|
streams[*peerID].Stream.Close()
|
||||||
|
delete(streams, *peerID)
|
||||||
|
} else {
|
||||||
|
for _, s := range ps {
|
||||||
|
for _, v := range s {
|
||||||
|
if v.Stream != nil {
|
||||||
|
v.Stream.Close()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
delete(ps, protocol)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var Indexers = &Directory{
|
||||||
|
Addrs: map[string]*pp.AddrInfo{},
|
||||||
|
Scores: map[string]*Score{},
|
||||||
|
Nudge: make(chan struct{}, 1),
|
||||||
|
Streams: ProtocolStream{},
|
||||||
|
}
|
||||||
295
daemons/node/common/common_heartbeat.go
Normal file
295
daemons/node/common/common_heartbeat.go
Normal file
@@ -0,0 +1,295 @@
|
|||||||
|
package common
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"io"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/libp2p/go-libp2p/core/host"
|
||||||
|
"github.com/libp2p/go-libp2p/core/network"
|
||||||
|
pp "github.com/libp2p/go-libp2p/core/peer"
|
||||||
|
|
||||||
|
oclib "cloud.o-forge.io/core/oc-lib"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Heartbeat struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Stream *Stream `json:"stream"`
|
||||||
|
DID string `json:"did"`
|
||||||
|
PeerID string `json:"peer_id"`
|
||||||
|
Timestamp int64 `json:"timestamp"`
|
||||||
|
IndexersBinded []string `json:"indexers_binded"`
|
||||||
|
Score float64
|
||||||
|
// Record carries a fresh signed PeerRecord (JSON) so the receiving indexer
|
||||||
|
// can republish it to the DHT without an extra round-trip.
|
||||||
|
// Only set by nodes (not indexers heartbeating other indexers).
|
||||||
|
Record json.RawMessage `json:"record,omitempty"`
|
||||||
|
// Need is how many more indexers this node wants (MaxIndexer - current pool size).
|
||||||
|
// The receiving indexer uses this to know how many suggestions to return.
|
||||||
|
// 0 means the pool is full — no suggestions needed unless SuggestMigrate.
|
||||||
|
Need int `json:"need,omitempty"`
|
||||||
|
// Challenges is a list of PeerIDs the node asks the indexer to spot-check.
|
||||||
|
// Always includes the node's own PeerID (ground truth) + up to 2 additional
|
||||||
|
// known peers. Nil means no challenge this tick.
|
||||||
|
Challenges []string `json:"challenges,omitempty"`
|
||||||
|
// ChallengeDID asks the indexer to retrieve this DID from the DHT (every 5th batch).
|
||||||
|
ChallengeDID string `json:"challenge_did,omitempty"`
|
||||||
|
// Referent marks this indexer as the node's designated search referent.
|
||||||
|
// Only one indexer per node receives Referent=true at a time (the best-scored one).
|
||||||
|
// The indexer stores the node in its referencedNodes for distributed search.
|
||||||
|
Referent bool `json:"referent,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// SearchPeerRequest is sent by a node to an indexer via ProtocolSearchPeer.
|
||||||
|
// The indexer broadcasts it on the GossipSub search mesh and streams results back.
|
||||||
|
type SearchPeerRequest struct {
|
||||||
|
QueryID string `json:"query_id"`
|
||||||
|
// At least one of PeerID, DID, Name must be set.
|
||||||
|
PeerID string `json:"peer_id,omitempty"`
|
||||||
|
DID string `json:"did,omitempty"`
|
||||||
|
Name string `json:"name,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// SearchQuery is broadcast on TopicSearchPeer by the receiving indexer.
|
||||||
|
// EmitterID is the indexer's own PeerID — responding indexers open a
|
||||||
|
// ProtocolSearchPeerResponse stream back to it.
|
||||||
|
type SearchQuery struct {
|
||||||
|
QueryID string `json:"query_id"`
|
||||||
|
PeerID string `json:"peer_id,omitempty"`
|
||||||
|
DID string `json:"did,omitempty"`
|
||||||
|
Name string `json:"name,omitempty"`
|
||||||
|
EmitterID string `json:"emitter_id"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// SearchPeerResult is sent by a responding indexer to the emitting indexer
|
||||||
|
// via ProtocolSearchPeerResponse, and forwarded by the emitting indexer to
|
||||||
|
// the node on the open ProtocolSearchPeer stream.
|
||||||
|
type SearchPeerResult struct {
|
||||||
|
QueryID string `json:"query_id"`
|
||||||
|
Records []SearchHit `json:"records"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// SearchHit is a single peer found during distributed search.
|
||||||
|
type SearchHit struct {
|
||||||
|
PeerID string `json:"peer_id"`
|
||||||
|
DID string `json:"did"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ChallengeEntry is the indexer's raw answer for one challenged peer.
|
||||||
|
type ChallengeEntry struct {
|
||||||
|
PeerID string `json:"peer_id"`
|
||||||
|
Found bool `json:"found"`
|
||||||
|
LastSeen time.Time `json:"last_seen,omitempty"` // zero if not found
|
||||||
|
}
|
||||||
|
|
||||||
|
// HeartbeatResponse carries raw metrics only — no pre-cooked score.
|
||||||
|
type HeartbeatResponse struct {
|
||||||
|
FillRate float64 `json:"fill_rate"`
|
||||||
|
PeerCount int `json:"peer_count"`
|
||||||
|
MaxNodes int `json:"max_nodes"` // capacity — lets node cross-check fillRate
|
||||||
|
BornAt time.Time `json:"born_at"`
|
||||||
|
Challenges []ChallengeEntry `json:"challenges,omitempty"`
|
||||||
|
// DHTFound / DHTPayload: response to a ChallengeDID request.
|
||||||
|
DHTFound bool `json:"dht_found,omitempty"`
|
||||||
|
DHTPayload json.RawMessage `json:"dht_payload,omitempty"`
|
||||||
|
// Witnesses: random sample of connected nodes so the querying node can cross-check.
|
||||||
|
Witnesses []pp.AddrInfo `json:"witnesses,omitempty"`
|
||||||
|
// Suggestions: better indexers this indexer knows about via its DHT cache.
|
||||||
|
// The node should open heartbeat connections to these (they become StaticIndexers).
|
||||||
|
Suggestions []pp.AddrInfo `json:"suggestions,omitempty"`
|
||||||
|
// SuggestMigrate: set when this indexer is overloaded (fill rate > threshold)
|
||||||
|
// and is actively trying to hand the node off to the Suggestions list.
|
||||||
|
// Seeds: node de-stickies this indexer once it has MinIndexer non-seed alternatives.
|
||||||
|
// Non-seeds: node removes this indexer immediately if it has enough alternatives.
|
||||||
|
SuggestMigrate bool `json:"suggest_migrate,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ComputeIndexerScore computes a composite quality score [0, 100] for the connecting peer.
|
||||||
|
// - uptimeRatio: fraction of tracked lifetime online (gap-aware) — peer reliability
|
||||||
|
// - bpms: bandwidth normalized to MaxExpectedMbps — link capacity
|
||||||
|
// - diversity: indexer's own /24 subnet diversity — network topology quality
|
||||||
|
// - latencyScore: 1 - RTT/maxRoundTrip — link responsiveness
|
||||||
|
// - fillRate: fraction of indexer slots used (0=empty, 1=full) — collective trust signal:
|
||||||
|
// a fuller indexer has been chosen and retained by many peers, which is evidence of quality.
|
||||||
|
func (hb *Heartbeat) ComputeIndexerScore(uptimeRatio float64, bpms float64, diversity float64, latencyScore float64, fillRate float64) {
|
||||||
|
hb.Score = ((0.20 * uptimeRatio) +
|
||||||
|
(0.20 * bpms) +
|
||||||
|
(0.20 * diversity) +
|
||||||
|
(0.15 * latencyScore) +
|
||||||
|
(0.25 * fillRate)) * 100
|
||||||
|
}
|
||||||
|
|
||||||
|
type HeartbeatInfo []struct {
|
||||||
|
Info []byte `json:"info"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// WitnessRequest is sent by a node to a peer to ask its view of a given indexer.
|
||||||
|
type WitnessRequest struct {
|
||||||
|
IndexerPeerID string `json:"indexer_peer_id"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// WitnessReport is returned by a peer in response to a WitnessRequest.
|
||||||
|
type WitnessReport struct {
|
||||||
|
Seen bool `json:"seen"`
|
||||||
|
BornAt time.Time `json:"born_at,omitempty"`
|
||||||
|
FillRate float64 `json:"fill_rate,omitempty"`
|
||||||
|
Score float64 `json:"score,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// HandleBandwidthProbe echoes back everything written on the stream, then closes.
|
||||||
|
// It is registered by all participants so the measuring side (the heartbeat receiver)
|
||||||
|
// can open a dedicated probe stream and read the round-trip latency + throughput.
|
||||||
|
func HandleBandwidthProbe(s network.Stream) {
|
||||||
|
defer s.Close()
|
||||||
|
s.SetDeadline(time.Now().Add(10 * time.Second))
|
||||||
|
io.Copy(s, s) // echo every byte back to the sender
|
||||||
|
}
|
||||||
|
|
||||||
|
// HandleWitnessQuery answers a witness query: the caller wants to know
|
||||||
|
// what this node thinks of a given indexer (identified by its PeerID).
|
||||||
|
func HandleWitnessQuery(h host.Host, s network.Stream) {
|
||||||
|
defer s.Close()
|
||||||
|
s.SetDeadline(time.Now().Add(5 * time.Second))
|
||||||
|
var req WitnessRequest
|
||||||
|
if err := json.NewDecoder(s).Decode(&req); err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
report := WitnessReport{}
|
||||||
|
for _, ai := range Indexers.GetAddrs() {
|
||||||
|
if ai.Info == nil || ai.Info.ID.String() != req.IndexerPeerID {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if score := Indexers.GetScore(addrKey(*ai.Info)); score != nil {
|
||||||
|
report.Seen = true
|
||||||
|
report.BornAt = score.LastBornAt
|
||||||
|
report.FillRate = score.LastFillRate
|
||||||
|
report.Score = score.Score
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
json.NewEncoder(s).Encode(report)
|
||||||
|
}
|
||||||
|
|
||||||
|
// IndirectProbeIndexer asks each witness in the cache whether it still sees
|
||||||
|
// the given indexer (by PeerID). Returns true if at least one witness confirms
|
||||||
|
// it is alive — meaning our direct link is asymmetrically broken, not the indexer.
|
||||||
|
// All probes run in parallel; the function blocks at most 5 seconds.
|
||||||
|
func IndirectProbeIndexer(h host.Host, indexerPeerID string, pool []WitnessCacheEntry) bool {
|
||||||
|
if len(pool) == 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
results := make(chan bool, len(pool))
|
||||||
|
for _, e := range pool {
|
||||||
|
go func(ai pp.AddrInfo) {
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
s, err := h.NewStream(ctx, ai.ID, ProtocolWitnessQuery)
|
||||||
|
if err != nil {
|
||||||
|
results <- false
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer s.Reset()
|
||||||
|
s.SetDeadline(time.Now().Add(5 * time.Second))
|
||||||
|
if err := json.NewEncoder(s).Encode(WitnessRequest{IndexerPeerID: indexerPeerID}); err != nil {
|
||||||
|
results <- false
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var rep WitnessReport
|
||||||
|
if err := json.NewDecoder(s).Decode(&rep); err != nil {
|
||||||
|
results <- false
|
||||||
|
return
|
||||||
|
}
|
||||||
|
results <- rep.Seen
|
||||||
|
}(e.AI)
|
||||||
|
}
|
||||||
|
for range pool {
|
||||||
|
if <-results {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// SupportsHeartbeat probes pid with a short-lived stream to verify it has
|
||||||
|
// a ProtocolHeartbeat handler (i.e. it is an indexer, not a plain node).
|
||||||
|
// Only protocol negotiation is performed — no data is sent.
|
||||||
|
// Returns false on any error, including "protocol not supported".
|
||||||
|
func SupportsHeartbeat(h host.Host, pid pp.ID) bool {
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
s, err := h.NewStream(ctx, pid, ProtocolHeartbeat)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
s.Reset()
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// queryWitnesses contacts each witness in parallel, collects their view of the
|
||||||
|
// indexer, and updates score.witnessChecked / score.witnessConsistent.
|
||||||
|
// Called in a goroutine — must not hold any lock.
|
||||||
|
func queryWitnesses(h host.Host, indexerPeerID string, indexerBornAt time.Time, indexerFillRate float64, witnesses []pp.AddrInfo, score *Score) {
|
||||||
|
logger := oclib.GetLogger()
|
||||||
|
type result struct{ consistent bool }
|
||||||
|
results := make(chan result, len(witnesses))
|
||||||
|
|
||||||
|
for _, ai := range witnesses {
|
||||||
|
if ai.ID == h.ID() {
|
||||||
|
// Never query ourselves — skip and count as inconclusive.
|
||||||
|
results <- result{}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
go func(ai pp.AddrInfo) {
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
s, err := h.NewStream(ctx, ai.ID, ProtocolWitnessQuery)
|
||||||
|
if err != nil {
|
||||||
|
results <- result{}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer s.Close()
|
||||||
|
s.SetDeadline(time.Now().Add(5 * time.Second))
|
||||||
|
if err := json.NewEncoder(s).Encode(WitnessRequest{IndexerPeerID: indexerPeerID}); err != nil {
|
||||||
|
results <- result{}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var rep WitnessReport
|
||||||
|
if err := json.NewDecoder(s).Decode(&rep); err != nil || !rep.Seen {
|
||||||
|
results <- result{}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// BornAt must be identical (fixed timestamp).
|
||||||
|
bornAtOK := !rep.BornAt.IsZero() && rep.BornAt.Equal(indexerBornAt)
|
||||||
|
// FillRate coherent within ±25% (it fluctuates normally).
|
||||||
|
diff := rep.FillRate - indexerFillRate
|
||||||
|
if diff < 0 {
|
||||||
|
diff = -diff
|
||||||
|
}
|
||||||
|
fillOK := diff < 0.25
|
||||||
|
consistent := bornAtOK && fillOK
|
||||||
|
logger.Debug().
|
||||||
|
Str("witness", ai.ID.String()).
|
||||||
|
Bool("bornAt_ok", bornAtOK).
|
||||||
|
Bool("fill_ok", fillOK).
|
||||||
|
Msg("witness report")
|
||||||
|
results <- result{consistent: consistent}
|
||||||
|
}(ai)
|
||||||
|
}
|
||||||
|
|
||||||
|
checked, consistent := 0, 0
|
||||||
|
for range witnesses {
|
||||||
|
r := <-results
|
||||||
|
checked++
|
||||||
|
if r.consistent {
|
||||||
|
consistent++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if checked == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
score.witnessChecked += checked
|
||||||
|
score.witnessConsistent += consistent
|
||||||
|
}
|
||||||
588
daemons/node/common/common_indexer_hb.go
Normal file
588
daemons/node/common/common_indexer_hb.go
Normal file
@@ -0,0 +1,588 @@
|
|||||||
|
package common
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"math/rand"
|
||||||
|
"strings"
|
||||||
|
"sync/atomic"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"oc-discovery/conf"
|
||||||
|
|
||||||
|
oclib "cloud.o-forge.io/core/oc-lib"
|
||||||
|
|
||||||
|
"github.com/libp2p/go-libp2p/core/host"
|
||||||
|
"github.com/libp2p/go-libp2p/core/network"
|
||||||
|
pp "github.com/libp2p/go-libp2p/core/peer"
|
||||||
|
"github.com/libp2p/go-libp2p/core/protocol"
|
||||||
|
)
|
||||||
|
|
||||||
|
var TimeWatcher time.Time
|
||||||
|
|
||||||
|
// retryRunning guards against launching multiple retryUntilSeedResponds goroutines.
|
||||||
|
var retryRunning atomic.Bool
|
||||||
|
|
||||||
|
func ConnectToIndexers(h host.Host, minIndexer int, maxIndexer int, recordFn ...func() json.RawMessage) error {
|
||||||
|
TimeWatcher = time.Now().UTC()
|
||||||
|
logger := oclib.GetLogger()
|
||||||
|
|
||||||
|
// Bootstrap from IndexerAddresses seed set.
|
||||||
|
addresses := strings.Split(conf.GetConfig().IndexerAddresses, ",")
|
||||||
|
if len(addresses) > maxIndexer {
|
||||||
|
addresses = addresses[0:maxIndexer]
|
||||||
|
}
|
||||||
|
for _, indexerAddr := range addresses {
|
||||||
|
indexerAddr = strings.TrimSpace(indexerAddr)
|
||||||
|
if indexerAddr == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
ad, err := pp.AddrInfoFromString(indexerAddr)
|
||||||
|
if err != nil {
|
||||||
|
logger.Err(err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
key := ad.ID.String()
|
||||||
|
Indexers.SetAddr(key, ad)
|
||||||
|
// Pre-create score entry with IsSeed=true so the sticky flag is set before
|
||||||
|
// the first heartbeat tick (lazy creation in doTick would lose the flag).
|
||||||
|
if !Indexers.ExistsScore(key) {
|
||||||
|
Indexers.SetScore(key, &Score{
|
||||||
|
FirstContacted: time.Now().UTC(),
|
||||||
|
UptimeTracker: &UptimeTracker{FirstSeen: time.Now().UTC()},
|
||||||
|
nextChallenge: rand.Intn(10) + 1,
|
||||||
|
IsSeed: true,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
seeds := Indexers.GetAddrs()
|
||||||
|
indexerCount := len(seeds)
|
||||||
|
|
||||||
|
if indexerCount < minIndexer {
|
||||||
|
return fmt.Errorf("you run a node without indexers... your gonna be isolated.")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start long-lived heartbeat to seed indexers. The single goroutine follows
|
||||||
|
// all subsequent StaticIndexers changes.
|
||||||
|
SendHeartbeat(context.Background(), ProtocolHeartbeat, conf.GetConfig().Name,
|
||||||
|
h, Indexers, 20*time.Second, maxIndexer, recordFn...)
|
||||||
|
|
||||||
|
// Watch for inbound connections: if a peer connects to us and our pool has
|
||||||
|
// room, probe it first to confirm it supports ProtocolHeartbeat (i.e. it is
|
||||||
|
// an indexer). Plain nodes don't register the handler — the negotiation fails
|
||||||
|
// instantly so we never pollute the pool with non-indexer peers.
|
||||||
|
h.Network().Notify(&network.NotifyBundle{
|
||||||
|
ConnectedF: func(n network.Network, c network.Conn) {
|
||||||
|
if c.Stat().Direction != network.DirInbound {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if len(Indexers.GetAddrs()) >= maxIndexer {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
peerID := c.RemotePeer()
|
||||||
|
if Indexers.ExistsAddr(peerID.String()) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Probe in a goroutine — ConnectedF must not block.
|
||||||
|
go func(pid pp.ID) {
|
||||||
|
if !SupportsHeartbeat(h, pid) {
|
||||||
|
return // plain node, skip
|
||||||
|
}
|
||||||
|
if len(Indexers.GetAddrs()) >= maxIndexer {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if Indexers.ExistsAddr(pid.String()) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
addrs := h.Peerstore().Addrs(pid)
|
||||||
|
if len(addrs) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
ai := FilterLoopbackAddrs(pp.AddrInfo{ID: pid, Addrs: addrs})
|
||||||
|
if len(ai.Addrs) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
adCopy := ai
|
||||||
|
Indexers.SetAddr(pid.String(), &adCopy)
|
||||||
|
Indexers.NudgeIt()
|
||||||
|
log := oclib.GetLogger()
|
||||||
|
log.Info().Str("peer", pid.String()).
|
||||||
|
Msg("[pool] inbound indexer peer added as candidate")
|
||||||
|
}(peerID)
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
// Proactive DHT upgrade: once seeds are connected and the DHT routing table
|
||||||
|
// is warm, discover better indexers and add them to the pool alongside the seeds.
|
||||||
|
// Seeds stay as guaranteed anchors; scoring will demote poor performers over time.
|
||||||
|
go func(seeds []Entry) {
|
||||||
|
// Let seed connections establish and the DHT routing table warm up.
|
||||||
|
time.Sleep(5 * time.Second)
|
||||||
|
// For pure nodes (no IndexerService), spin up a lightweight DHT client.
|
||||||
|
if discoveryDHT == nil {
|
||||||
|
if len(seeds) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
initNodeDHT(h, seeds)
|
||||||
|
}
|
||||||
|
if discoveryDHT == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
current := len(Indexers.GetAddrs())
|
||||||
|
need := maxIndexer - current
|
||||||
|
if need <= 0 {
|
||||||
|
need = maxIndexer / 2 // diversify even when pool is already at capacity
|
||||||
|
}
|
||||||
|
logger.Info().Int("need", need).Msg("[dht] proactive indexer discovery from DHT")
|
||||||
|
replenishIndexersFromDHT(h, need)
|
||||||
|
}(seeds)
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// reconnectToSeeds re-adds the configured seed indexers to StaticIndexers as
|
||||||
|
// sticky fallback entries. Called when the pool drops to zero so the node
|
||||||
|
// never becomes completely isolated.
|
||||||
|
func reconnectToSeeds() {
|
||||||
|
logger := oclib.GetLogger()
|
||||||
|
logger.Warn().Msg("[pool] all indexers lost, reconnecting to configured seeds")
|
||||||
|
addresses := strings.Split(conf.GetConfig().IndexerAddresses, ",")
|
||||||
|
for _, addrStr := range addresses {
|
||||||
|
addrStr = strings.TrimSpace(addrStr)
|
||||||
|
if addrStr == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
ad, err := pp.AddrInfoFromString(addrStr)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
key := ad.ID.String()
|
||||||
|
Indexers.SetAddr(key, ad)
|
||||||
|
if score := Indexers.GetScore(key); score == nil {
|
||||||
|
Indexers.SetScore(key, &Score{
|
||||||
|
FirstContacted: time.Now().UTC(),
|
||||||
|
UptimeTracker: &UptimeTracker{FirstSeen: time.Now().UTC()},
|
||||||
|
nextChallenge: rand.Intn(10) + 1,
|
||||||
|
IsSeed: true,
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
// Restore sticky flag so the seed is not immediately re-ejected.
|
||||||
|
score.IsSeed = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// retryUntilSeedResponds loops with exponential backoff until at least one
|
||||||
|
// configured seed is reachable again. Once seeds are back in the pool it
|
||||||
|
// nudges the heartbeat loop and lets the normal DHT upgrade path take over.
|
||||||
|
// Should be called in a goroutine — it blocks until the situation resolves.
|
||||||
|
// Panics immediately if no seeds are configured: there is nothing to wait for.
|
||||||
|
func retryUntilSeedResponds() {
|
||||||
|
if !retryRunning.CompareAndSwap(false, true) {
|
||||||
|
return // another goroutine is already running the retry loop
|
||||||
|
}
|
||||||
|
defer retryRunning.Store(false)
|
||||||
|
|
||||||
|
logger := oclib.GetLogger()
|
||||||
|
rawAddresses := strings.TrimSpace(conf.GetConfig().IndexerAddresses)
|
||||||
|
if rawAddresses == "" {
|
||||||
|
// No seeds configured: rely on the inbound-connection notifee to fill
|
||||||
|
// the pool. Just wait patiently — the loop below will return as soon
|
||||||
|
// as any peer connects and NudgeIt() is called.
|
||||||
|
logger.Warn().Msg("[pool] pool empty and no seeds configured — waiting for inbound indexer")
|
||||||
|
}
|
||||||
|
backoff := 10 * time.Second
|
||||||
|
const maxBackoff = 5 * time.Minute
|
||||||
|
for {
|
||||||
|
time.Sleep(backoff)
|
||||||
|
if backoff < maxBackoff {
|
||||||
|
backoff *= 2
|
||||||
|
}
|
||||||
|
// Check whether someone else already refilled the pool.
|
||||||
|
if len(Indexers.GetAddrs()) > 0 {
|
||||||
|
logger.Info().Msg("[pool] pool refilled externally, stopping seed retry")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
logger.Warn().Dur("backoff", backoff).Msg("[pool] still isolated, retrying seeds")
|
||||||
|
reconnectToSeeds()
|
||||||
|
if len(Indexers.GetAddrs()) > 0 {
|
||||||
|
Indexers.NudgeIt()
|
||||||
|
// Re-bootstrap DHT now that we have at least one connection candidate.
|
||||||
|
if discoveryDHT != nil {
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
|
||||||
|
discoveryDHT.Bootstrap(ctx) //nolint:errcheck
|
||||||
|
cancel()
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ensureScore returns the Score for addr, creating it if absent.
|
||||||
|
func ensureScore(d *Directory, addr string) *Score {
|
||||||
|
if !d.ExistsScore(addr) {
|
||||||
|
d.SetScore(addr, &Score{
|
||||||
|
FirstContacted: time.Now().UTC(),
|
||||||
|
UptimeTracker: &UptimeTracker{FirstSeen: time.Now().UTC()},
|
||||||
|
nextChallenge: rand.Intn(10) + 1,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return d.GetScore(addr)
|
||||||
|
}
|
||||||
|
|
||||||
|
// evictPeer removes addr from directory atomically and returns a snapshot of
|
||||||
|
// remaining AddrInfos (for consensus voter selection).
|
||||||
|
func evictPeer(d *Directory, addr string, id pp.ID, proto protocol.ID) []pp.AddrInfo {
|
||||||
|
d.Streams.Delete(proto, &id)
|
||||||
|
d.DeleteAddr(addr)
|
||||||
|
voters := make([]pp.AddrInfo, 0, len(d.Addrs))
|
||||||
|
for _, ai := range d.GetAddrs() {
|
||||||
|
if ai.Info != nil {
|
||||||
|
voters = append(voters, *ai.Info)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
d.DeleteScore(addr)
|
||||||
|
return voters
|
||||||
|
}
|
||||||
|
|
||||||
|
// handleSuggestions adds unknown suggested indexers to the directory.
|
||||||
|
func handleSuggestions(d *Directory, from string, suggestions []pp.AddrInfo) {
|
||||||
|
added := 0
|
||||||
|
for _, sug := range suggestions {
|
||||||
|
key := addrKey(sug)
|
||||||
|
if !d.ExistsAddr(key) {
|
||||||
|
cpy := sug
|
||||||
|
d.SetAddr(key, &cpy)
|
||||||
|
added++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if added > 0 {
|
||||||
|
logger := oclib.GetLogger()
|
||||||
|
logger.Info().Int("added", added).Str("from", from).
|
||||||
|
Msg("added suggested indexers from heartbeat response")
|
||||||
|
d.NudgeIt()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// SendHeartbeat starts a goroutine that sends periodic heartbeats to peers.
|
||||||
|
// recordFn, when provided, is called on each tick and its output is embedded in
|
||||||
|
// the heartbeat as a fresh signed PeerRecord so the receiving indexer can
|
||||||
|
// republish it to the DHT without an extra round-trip.
|
||||||
|
// Pass no recordFn (or nil) for indexer→indexer / native heartbeats.
|
||||||
|
func SendHeartbeat(ctx context.Context, proto protocol.ID, name string, h host.Host, directory *Directory, interval time.Duration, maxPool int, recordFn ...func() json.RawMessage) {
|
||||||
|
logger := oclib.GetLogger()
|
||||||
|
isIndexerHB := directory == Indexers
|
||||||
|
var recFn func() json.RawMessage
|
||||||
|
if len(recordFn) > 0 {
|
||||||
|
recFn = recordFn[0]
|
||||||
|
}
|
||||||
|
go func() {
|
||||||
|
logger.Info().Str("proto", string(proto)).Int("peers", len(directory.Addrs)).Msg("heartbeat started")
|
||||||
|
t := time.NewTicker(interval)
|
||||||
|
defer t.Stop()
|
||||||
|
|
||||||
|
// peerEntry pairs addr key with AddrInfo so doTick can update score maps directly.
|
||||||
|
type peerEntry struct {
|
||||||
|
addr string
|
||||||
|
ai *pp.AddrInfo
|
||||||
|
}
|
||||||
|
|
||||||
|
doTick := func() {
|
||||||
|
addrs := directory.GetAddrsStr()
|
||||||
|
need := maxPool - len(addrs)
|
||||||
|
if need < 0 {
|
||||||
|
need = 0
|
||||||
|
}
|
||||||
|
baseHB := Heartbeat{
|
||||||
|
Name: name,
|
||||||
|
PeerID: h.ID().String(),
|
||||||
|
Timestamp: time.Now().UTC().Unix(),
|
||||||
|
IndexersBinded: addrs,
|
||||||
|
Need: need,
|
||||||
|
}
|
||||||
|
if recFn != nil {
|
||||||
|
baseHB.Record = recFn()
|
||||||
|
}
|
||||||
|
// Determine the referent indexer: highest-scored one receives Referent=true
|
||||||
|
// so it stores us in its referencedNodes for distributed search.
|
||||||
|
var referentAddr string
|
||||||
|
if isIndexerHB {
|
||||||
|
var bestScore float64 = -1
|
||||||
|
for _, ai2 := range directory.GetAddrs() {
|
||||||
|
if s := directory.GetScore(ai2.Addr); s != nil && s.Score > bestScore {
|
||||||
|
bestScore = s.Score
|
||||||
|
referentAddr = ai2.Addr
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, ai := range directory.GetAddrs() {
|
||||||
|
// Build per-peer heartbeat copy so challenge injection is peer-specific.
|
||||||
|
hb := baseHB
|
||||||
|
if isIndexerHB && referentAddr != "" && ai.Addr == referentAddr {
|
||||||
|
hb.Referent = true
|
||||||
|
}
|
||||||
|
// Ensure an IndexerScore entry exists for this peer.
|
||||||
|
var score *Score
|
||||||
|
if isIndexerHB {
|
||||||
|
score = ensureScore(directory, ai.Addr)
|
||||||
|
|
||||||
|
// Inject challenge batch if due (random 1-10 HBs between batches).
|
||||||
|
score.hbCount++
|
||||||
|
if score.hbCount >= score.nextChallenge {
|
||||||
|
// Ground truth: node's own PeerID — indexer MUST have us.
|
||||||
|
challenges := []string{h.ID().String()}
|
||||||
|
// Add up to 2 more known peers (other indexers) for richer data.
|
||||||
|
// Use the already-snapshotted entries to avoid re-locking.
|
||||||
|
for _, ai2 := range directory.GetAddrs() {
|
||||||
|
if ai2.Addr != ai.Addr && ai2.Info != nil {
|
||||||
|
challenges = append(challenges, ai2.Info.ID.String())
|
||||||
|
if len(challenges) >= 3 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
hb.Challenges = challenges
|
||||||
|
score.hbCount = 0
|
||||||
|
score.nextChallenge = rand.Intn(10) + 1
|
||||||
|
score.challengeTotal++ // count own-PeerID challenge (ground truth)
|
||||||
|
score.dhtBatchCounter++
|
||||||
|
// DHT challenge every 5th batch: ask indexer to retrieve our own DID.
|
||||||
|
if score.dhtBatchCounter%5 == 0 {
|
||||||
|
var selfDID string
|
||||||
|
if len(baseHB.Record) > 0 {
|
||||||
|
var partial struct {
|
||||||
|
DID string `json:"did"`
|
||||||
|
}
|
||||||
|
if json.Unmarshal(baseHB.Record, &partial) == nil {
|
||||||
|
selfDID = partial.DID
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if selfDID != "" {
|
||||||
|
hb.ChallengeDID = selfDID
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, rtt, err := sendHeartbeat(ctx, h, proto, ai.Info, hb, directory.Streams, interval*time.Second)
|
||||||
|
if err != nil { // Heartbeat fails
|
||||||
|
fmt.Println("EERR", err)
|
||||||
|
HeartbeatFailure(h, proto, directory, ai.Addr, ai.Info, isIndexerHB, maxPool, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update IndexerScore — uptime recorded on any successful send,
|
||||||
|
// even if the indexer does not support bidirectional heartbeat (Fix 1).
|
||||||
|
if isIndexerHB && score != nil {
|
||||||
|
score.UptimeTracker.RecordHeartbeat()
|
||||||
|
|
||||||
|
maxRTT := BaseRoundTrip * 10
|
||||||
|
latencyScore := 1.0 - float64(rtt)/float64(maxRTT)
|
||||||
|
if latencyScore < 0 {
|
||||||
|
latencyScore = 0
|
||||||
|
}
|
||||||
|
if latencyScore > 1 {
|
||||||
|
latencyScore = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update fill / challenge fields only when the indexer responded.
|
||||||
|
if resp != nil {
|
||||||
|
// BornAt stability check.
|
||||||
|
if score.LastBornAt.IsZero() {
|
||||||
|
score.LastBornAt = resp.BornAt
|
||||||
|
} else if !resp.BornAt.IsZero() && !resp.BornAt.Equal(score.LastBornAt) {
|
||||||
|
score.bornAtChanges++
|
||||||
|
score.LastBornAt = resp.BornAt
|
||||||
|
logger.Warn().Str("peer", ai.Info.ID.String()).
|
||||||
|
Int("changes", score.bornAtChanges).
|
||||||
|
Msg("indexer BornAt changed — possible restart or impersonation")
|
||||||
|
}
|
||||||
|
score.LastFillRate = resp.FillRate
|
||||||
|
|
||||||
|
// Fill rate consistency: cross-check peerCount/maxNodes vs reported fillRate.
|
||||||
|
if resp.MaxNodes > 0 {
|
||||||
|
expected := float64(resp.PeerCount) / float64(resp.MaxNodes)
|
||||||
|
diff := expected - resp.FillRate
|
||||||
|
if diff < 0 {
|
||||||
|
diff = -diff
|
||||||
|
}
|
||||||
|
score.fillChecked++
|
||||||
|
if diff < 0.1 {
|
||||||
|
score.fillConsistent++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate challenge responses. Only own-PeerID counts as ground truth.
|
||||||
|
if len(hb.Challenges) > 0 && len(resp.Challenges) > 0 {
|
||||||
|
ownID := h.ID().String()
|
||||||
|
for _, ce := range resp.Challenges {
|
||||||
|
if ce.PeerID != ownID {
|
||||||
|
continue // informational only
|
||||||
|
}
|
||||||
|
recentEnough := !ce.LastSeen.IsZero() &&
|
||||||
|
time.Since(ce.LastSeen) < 2*RecommendedHeartbeatInterval
|
||||||
|
if ce.Found && recentEnough {
|
||||||
|
score.challengeCorrect++
|
||||||
|
}
|
||||||
|
logger.Info().Str("peer", ai.Info.ID.String()).
|
||||||
|
Bool("found", ce.Found).
|
||||||
|
Bool("recent", recentEnough).
|
||||||
|
Msg("own-PeerID challenge result")
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// DHT challenge result.
|
||||||
|
if hb.ChallengeDID != "" {
|
||||||
|
score.dhtChecked++
|
||||||
|
if resp.DHTFound {
|
||||||
|
score.dhtSuccess++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Refresh local witness cache for indirect probing on future failure.
|
||||||
|
for _, w := range resp.Witnesses {
|
||||||
|
score.UpdateWitnessPool(w)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Launch witness cross-check asynchronously (must not hold lock).
|
||||||
|
if len(resp.Witnesses) > 0 {
|
||||||
|
go queryWitnesses(h, ai.Info.ID.String(), resp.BornAt, resp.FillRate, resp.Witnesses, score)
|
||||||
|
} else if resp.MaxNodes > 0 {
|
||||||
|
// No witnesses offered. Valid if indexer only has us (PeerCount==1).
|
||||||
|
// Cross-check: FillRate should equal 1/MaxNodes within ±10%.
|
||||||
|
expected := 1.0 / float64(resp.MaxNodes)
|
||||||
|
diff := resp.FillRate - expected
|
||||||
|
if diff < 0 {
|
||||||
|
diff = -diff
|
||||||
|
}
|
||||||
|
score.witnessChecked++
|
||||||
|
if resp.PeerCount == 1 && diff < 0.1 {
|
||||||
|
score.witnessConsistent++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
score.Score = score.ComputeNodeSideScore(latencyScore)
|
||||||
|
age := score.UptimeTracker.Uptime()
|
||||||
|
minScore := dynamicMinScore(age)
|
||||||
|
// Fix 4: grace period — at least 2 full heartbeat cycles before ejecting.
|
||||||
|
isSeed := score.IsSeed
|
||||||
|
// Seeds are sticky: never evicted by score alone (SuggestMigrate handles it).
|
||||||
|
// Never eject the last indexer by score alone — we would lose all connectivity.
|
||||||
|
belowThreshold := score.Score < minScore &&
|
||||||
|
score.UptimeTracker.TotalOnline >= 2*RecommendedHeartbeatInterval &&
|
||||||
|
!isSeed &&
|
||||||
|
len(directory.Addrs) > 1
|
||||||
|
|
||||||
|
if belowThreshold {
|
||||||
|
logger.Info().Str("peer", ai.Info.ID.String()).
|
||||||
|
Float64("score", score.Score).Float64("min", minScore).
|
||||||
|
Msg("indexer score below threshold, removing from pool")
|
||||||
|
voters := evictPeer(directory, ai.Addr, ai.Info.ID, proto)
|
||||||
|
need := max(maxPool-len(voters), 1)
|
||||||
|
if len(voters) > 0 {
|
||||||
|
go TriggerConsensus(h, voters, need)
|
||||||
|
} else {
|
||||||
|
go replenishIndexersFromDHT(h, need)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Accept suggestions from this indexer — add unknown ones to the directory.
|
||||||
|
if resp != nil && len(resp.Suggestions) > 0 {
|
||||||
|
handleSuggestions(directory, ai.Info.ID.String(), resp.Suggestions)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle SuggestMigrate: indexer is overloaded and wants us to move.
|
||||||
|
if resp != nil && resp.SuggestMigrate && isIndexerHB {
|
||||||
|
nonSeedCount := 0
|
||||||
|
for _, sc := range directory.GetScores() {
|
||||||
|
if !sc.IsSeed {
|
||||||
|
nonSeedCount++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if nonSeedCount >= conf.GetConfig().MinIndexer {
|
||||||
|
if isSeed {
|
||||||
|
// Seed has offloaded us: clear sticky flag, score eviction takes over.
|
||||||
|
score.IsSeed = false
|
||||||
|
logger.Info().Str("peer", ai.Info.ID.String()).
|
||||||
|
Msg("seed discharged via SuggestMigrate, de-stickied")
|
||||||
|
} else {
|
||||||
|
evictPeer(directory, ai.Addr, ai.Info.ID, proto)
|
||||||
|
logger.Info().Str("peer", ai.Info.ID.String()).Msg("accepted migration from overloaded indexer")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-t.C:
|
||||||
|
doTick()
|
||||||
|
case <-directory.Nudge:
|
||||||
|
if isIndexerHB {
|
||||||
|
logger.Info().Msg("nudge received, heartbeating new indexers immediately")
|
||||||
|
doTick()
|
||||||
|
}
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
func HeartbeatFailure(h host.Host, proto protocol.ID, directory *Directory,
|
||||||
|
addr string, info *pp.AddrInfo, isIndexerHB bool, maxPool int, err error) {
|
||||||
|
logger := oclib.GetLogger()
|
||||||
|
logger.Err(err)
|
||||||
|
// Seeds are never evicted on heartbeat failure.
|
||||||
|
// Keeping them in the pool lets the regular 60-second ticker retry them
|
||||||
|
// at a natural cadence — no reconnect storm, no libp2p dial-backoff accumulation.
|
||||||
|
// A seed will self-heal once it comes back; DHT and inbound peers fill the gap.
|
||||||
|
if isIndexerHB {
|
||||||
|
if score := directory.GetScore(addr); score != nil {
|
||||||
|
if score.IsSeed {
|
||||||
|
logger.Warn().Str("peer", info.ID.String()).
|
||||||
|
Msg("[pool] seed heartbeat failed — keeping in pool, ticker will retry " + err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Indirect probe: query cached witnesses before declaring the indexer dead.
|
||||||
|
// If a witness confirms it is alive, the failure is a local asymmetric
|
||||||
|
// link — not the indexer. Skip eviction; next tick will retry directly.
|
||||||
|
if len(score.WitnessPool) > 0 {
|
||||||
|
pool := append([]WitnessCacheEntry(nil), score.WitnessPool...)
|
||||||
|
if IndirectProbeIndexer(h, info.ID.String(), pool) {
|
||||||
|
logger.Warn().Str("peer", info.ID.String()).
|
||||||
|
Msg("[indirect] witness confirms indexer alive — asymmetric link, skipping eviction " + err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.Info().Str("peer", info.ID.String()).Str("proto", string(proto)).
|
||||||
|
Msg("heartbeat failed, removing peer from pool : " + err.Error())
|
||||||
|
consensusVoters := evictPeer(directory, addr, info.ID, proto)
|
||||||
|
if isIndexerHB {
|
||||||
|
need := maxPool - len(consensusVoters)
|
||||||
|
if need < 1 {
|
||||||
|
need = 1
|
||||||
|
}
|
||||||
|
logger.Info().Int("remaining", len(consensusVoters)).Int("need", need).Msg("pool state after removal")
|
||||||
|
poolSize := len(directory.GetAddrs())
|
||||||
|
if poolSize == 0 {
|
||||||
|
// Pool is truly empty (no seeds configured or no seeds in pool).
|
||||||
|
// Start the backoff retry loop — it will re-add seeds and nudge
|
||||||
|
// only once a seed actually responds.
|
||||||
|
go retryUntilSeedResponds()
|
||||||
|
} else if len(consensusVoters) > 0 {
|
||||||
|
go TriggerConsensus(h, consensusVoters, need)
|
||||||
|
} else {
|
||||||
|
go replenishIndexersFromDHT(h, need)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
182
daemons/node/common/common_scoring.go
Normal file
182
daemons/node/common/common_scoring.go
Normal file
@@ -0,0 +1,182 @@
|
|||||||
|
package common
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
cr "crypto/rand"
|
||||||
|
"io"
|
||||||
|
"net"
|
||||||
|
"slices"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/libp2p/go-libp2p/core/host"
|
||||||
|
pp "github.com/libp2p/go-libp2p/core/peer"
|
||||||
|
)
|
||||||
|
|
||||||
|
const MaxExpectedMbps = 100.0
|
||||||
|
const MinPayloadChallenge = 512
|
||||||
|
const MaxPayloadChallenge = 2048
|
||||||
|
const BaseRoundTrip = 400 * time.Millisecond
|
||||||
|
|
||||||
|
type UptimeTracker struct {
|
||||||
|
FirstSeen time.Time
|
||||||
|
LastSeen time.Time
|
||||||
|
TotalOnline time.Duration
|
||||||
|
}
|
||||||
|
|
||||||
|
// RecordHeartbeat accumulates online time gap-aware: only counts the interval if
|
||||||
|
// the gap since the last heartbeat is within 2× the recommended interval (i.e. no
|
||||||
|
// extended outage). Call this each time a heartbeat is successfully processed.
|
||||||
|
func (u *UptimeTracker) RecordHeartbeat() {
|
||||||
|
now := time.Now().UTC()
|
||||||
|
if !u.LastSeen.IsZero() {
|
||||||
|
gap := now.Sub(u.LastSeen)
|
||||||
|
if gap <= 2*RecommendedHeartbeatInterval {
|
||||||
|
u.TotalOnline += gap
|
||||||
|
}
|
||||||
|
}
|
||||||
|
u.LastSeen = now
|
||||||
|
}
|
||||||
|
|
||||||
|
func (u *UptimeTracker) Uptime() time.Duration {
|
||||||
|
return time.Since(u.FirstSeen)
|
||||||
|
}
|
||||||
|
|
||||||
|
// UptimeRatio returns the fraction of tracked lifetime during which the peer was
|
||||||
|
// continuously online (gap ≤ 2×RecommendedHeartbeatInterval). Returns 0 before
|
||||||
|
// the first heartbeat interval has elapsed.
|
||||||
|
func (u *UptimeTracker) UptimeRatio() float64 {
|
||||||
|
total := time.Since(u.FirstSeen)
|
||||||
|
if total <= 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
ratio := float64(u.TotalOnline) / float64(total)
|
||||||
|
if ratio > 1 {
|
||||||
|
ratio = 1
|
||||||
|
}
|
||||||
|
return ratio
|
||||||
|
}
|
||||||
|
|
||||||
|
func (u *UptimeTracker) IsEligible(min time.Duration) bool {
|
||||||
|
return u.Uptime() >= min
|
||||||
|
}
|
||||||
|
|
||||||
|
// getBandwidthChallengeRate opens a dedicated ProtocolBandwidthProbe stream to
|
||||||
|
// remotePeer, sends a random payload, reads the echo, and computes throughput
|
||||||
|
// and a latency score. Returns (ok, bpms, latencyScore, error).
|
||||||
|
// latencyScore is 1.0 when RTT is very fast and 0.0 when at or beyond maxRoundTrip.
|
||||||
|
// Using a separate stream avoids mixing binary data on the JSON heartbeat stream
|
||||||
|
// and ensures the echo handler is actually running on the remote side.
|
||||||
|
func getBandwidthChallengeRate(h host.Host, remotePeer pp.ID, payloadSize int) (bool, float64, float64, error) {
|
||||||
|
payload := make([]byte, payloadSize)
|
||||||
|
if _, err := cr.Read(payload); err != nil {
|
||||||
|
return false, 0, 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
s, err := h.NewStream(ctx, remotePeer, ProtocolBandwidthProbe)
|
||||||
|
if err != nil {
|
||||||
|
return false, 0, 0, err
|
||||||
|
}
|
||||||
|
defer s.Reset()
|
||||||
|
s.SetDeadline(time.Now().Add(10 * time.Second))
|
||||||
|
start := time.Now()
|
||||||
|
if _, err = s.Write(payload); err != nil {
|
||||||
|
return false, 0, 0, err
|
||||||
|
}
|
||||||
|
s.CloseWrite()
|
||||||
|
// Half-close the write side so the handler's io.Copy sees EOF and stops.
|
||||||
|
// Read the echo.
|
||||||
|
response := make([]byte, payloadSize)
|
||||||
|
if _, err = io.ReadFull(s, response); err != nil {
|
||||||
|
return false, 0, 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
duration := time.Since(start)
|
||||||
|
maxRoundTrip := BaseRoundTrip + (time.Duration(payloadSize) * (100 * time.Millisecond))
|
||||||
|
mbps := float64(payloadSize*8) / duration.Seconds() / 1e6
|
||||||
|
|
||||||
|
// latencyScore: 1.0 = instant, 0.0 = at maxRoundTrip or beyond.
|
||||||
|
latencyScore := 1.0 - float64(duration)/float64(maxRoundTrip)
|
||||||
|
if latencyScore < 0 {
|
||||||
|
latencyScore = 0
|
||||||
|
}
|
||||||
|
if latencyScore > 1 {
|
||||||
|
latencyScore = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
if duration > maxRoundTrip || mbps < 5.0 {
|
||||||
|
return false, float64(mbps / MaxExpectedMbps), latencyScore, nil
|
||||||
|
}
|
||||||
|
return true, float64(mbps / MaxExpectedMbps), latencyScore, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getDiversityRate(h host.Host, peers []string) float64 {
|
||||||
|
peers, _ = checkPeers(h, peers)
|
||||||
|
diverse := []string{}
|
||||||
|
for _, p := range peers {
|
||||||
|
ip, err := ExtractIP(p)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
div := ip.Mask(net.CIDRMask(24, 32)).String()
|
||||||
|
if !slices.Contains(diverse, div) {
|
||||||
|
diverse = append(diverse, div)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(diverse) == 0 || len(peers) == 0 {
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
return float64(len(diverse)) / float64(len(peers))
|
||||||
|
}
|
||||||
|
|
||||||
|
// getOwnDiversityRate measures subnet /24 diversity of the indexer's own connected peers.
|
||||||
|
// This evaluates the indexer's network position rather than the connecting node's topology.
|
||||||
|
func getOwnDiversityRate(h host.Host) float64 {
|
||||||
|
diverse := map[string]struct{}{}
|
||||||
|
total := 0
|
||||||
|
for _, pid := range h.Network().Peers() {
|
||||||
|
for _, maddr := range h.Peerstore().Addrs(pid) {
|
||||||
|
total++
|
||||||
|
ip, err := ExtractIP(maddr.String())
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
diverse[ip.Mask(net.CIDRMask(24, 32)).String()] = struct{}{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if total == 0 {
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
return float64(len(diverse)) / float64(total)
|
||||||
|
}
|
||||||
|
|
||||||
|
func checkPeers(h host.Host, peers []string) ([]string, []string) {
|
||||||
|
concretePeer := []string{}
|
||||||
|
ips := []string{}
|
||||||
|
for _, p := range peers {
|
||||||
|
ad, err := pp.AddrInfoFromString(p)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if PeerIsAlive(h, *ad) {
|
||||||
|
concretePeer = append(concretePeer, p)
|
||||||
|
if ip, err := ExtractIP(p); err == nil {
|
||||||
|
ips = append(ips, ip.Mask(net.CIDRMask(24, 32)).String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return concretePeer, ips
|
||||||
|
}
|
||||||
|
|
||||||
|
// dynamicMinScore returns the minimum acceptable score for a peer, starting
|
||||||
|
// permissive (20%) for brand-new peers and hardening linearly to 80% over 24h.
|
||||||
|
// This prevents ejecting newcomers in fresh networks while filtering parasites.
|
||||||
|
func dynamicMinScore(age time.Duration) float64 {
|
||||||
|
hours := age.Hours()
|
||||||
|
score := 20.0 + 60.0*(hours/24.0)
|
||||||
|
if score > 80.0 {
|
||||||
|
score = 80.0
|
||||||
|
}
|
||||||
|
return score
|
||||||
|
}
|
||||||
302
daemons/node/common/common_service.go
Normal file
302
daemons/node/common/common_service.go
Normal file
@@ -0,0 +1,302 @@
|
|||||||
|
package common
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"math/rand"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
oclib "cloud.o-forge.io/core/oc-lib"
|
||||||
|
"github.com/libp2p/go-libp2p/core/host"
|
||||||
|
"github.com/libp2p/go-libp2p/core/network"
|
||||||
|
pp "github.com/libp2p/go-libp2p/core/peer"
|
||||||
|
"github.com/libp2p/go-libp2p/core/protocol"
|
||||||
|
)
|
||||||
|
|
||||||
|
type LongLivedStreamRecordedService[T interface{}] struct {
|
||||||
|
*LongLivedPubSubService
|
||||||
|
StreamRecords map[protocol.ID]map[pp.ID]*StreamRecord[T]
|
||||||
|
StreamMU sync.RWMutex
|
||||||
|
maxNodesConn int
|
||||||
|
// AllowInbound, when set, is called once at stream open before any heartbeat
|
||||||
|
// is decoded. remotePeer is the connecting peer; isNew is true when no
|
||||||
|
// StreamRecord exists yet (first-ever connection). Return a non-nil error
|
||||||
|
// to immediately reset the stream and refuse the peer.
|
||||||
|
AllowInbound func(remotePeer pp.ID, isNew bool) error
|
||||||
|
// ValidateHeartbeat, when set, is called inside the heartbeat loop after
|
||||||
|
// each successful CheckHeartbeat decode. Return a non-nil error to reset
|
||||||
|
// the stream and terminate the session.
|
||||||
|
ValidateHeartbeat func(remotePeer pp.ID) error
|
||||||
|
// AfterHeartbeat is called after each successful heartbeat with the full
|
||||||
|
// decoded Heartbeat so the hook can use the fresh embedded PeerRecord.
|
||||||
|
AfterHeartbeat func(hb *Heartbeat)
|
||||||
|
// AfterDelete is called after gc() evicts an expired peer, outside the lock.
|
||||||
|
// name and did may be empty if the HeartbeatStream had no metadata.
|
||||||
|
AfterDelete func(pid pp.ID, name string, did string)
|
||||||
|
// BuildHeartbeatResponse, when set, is called after each successfully decoded
|
||||||
|
// heartbeat to build the response sent back to the node.
|
||||||
|
// remotePeer is the peer that sent the heartbeat (used for offload routing).
|
||||||
|
// need is how many more indexers the node wants (from hb.Need).
|
||||||
|
// referent is true when the node designated this indexer as its search referent.
|
||||||
|
BuildHeartbeatResponse func(remotePeer pp.ID, need int, challenges []string, challengeDID string, referent bool) *HeartbeatResponse
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ix *LongLivedStreamRecordedService[T]) MaxNodesConn() int {
|
||||||
|
return ix.maxNodesConn
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewStreamRecordedService[T interface{}](h host.Host, maxNodesConn int) *LongLivedStreamRecordedService[T] {
|
||||||
|
service := &LongLivedStreamRecordedService[T]{
|
||||||
|
LongLivedPubSubService: NewLongLivedPubSubService(h),
|
||||||
|
StreamRecords: map[protocol.ID]map[pp.ID]*StreamRecord[T]{},
|
||||||
|
maxNodesConn: maxNodesConn,
|
||||||
|
}
|
||||||
|
go service.StartGC(30 * time.Second)
|
||||||
|
// Garbage collection is needed on every Map of Long-Lived Stream... it may be a top level redesigned
|
||||||
|
go service.Snapshot(1 * time.Hour)
|
||||||
|
return service
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ix *LongLivedStreamRecordedService[T]) StartGC(interval time.Duration) {
|
||||||
|
go func() {
|
||||||
|
t := time.NewTicker(interval)
|
||||||
|
defer t.Stop()
|
||||||
|
for range t.C {
|
||||||
|
fmt.Println("ACTUALLY RELATED INDEXERS", Indexers.Addrs, len(Indexers.Addrs))
|
||||||
|
ix.gc()
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ix *LongLivedStreamRecordedService[T]) gc() {
|
||||||
|
ix.StreamMU.Lock()
|
||||||
|
now := time.Now().UTC()
|
||||||
|
if ix.StreamRecords[ProtocolHeartbeat] == nil {
|
||||||
|
ix.StreamRecords[ProtocolHeartbeat] = map[pp.ID]*StreamRecord[T]{}
|
||||||
|
ix.StreamMU.Unlock()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
streams := ix.StreamRecords[ProtocolHeartbeat]
|
||||||
|
|
||||||
|
type gcEntry struct {
|
||||||
|
pid pp.ID
|
||||||
|
name string
|
||||||
|
did string
|
||||||
|
}
|
||||||
|
var evicted []gcEntry
|
||||||
|
for pid, rec := range streams {
|
||||||
|
if now.After(rec.HeartbeatStream.Expiry) || now.Sub(rec.HeartbeatStream.UptimeTracker.LastSeen) > 2*rec.HeartbeatStream.Expiry.Sub(now) {
|
||||||
|
name, did := "", ""
|
||||||
|
if rec.HeartbeatStream != nil {
|
||||||
|
name = rec.HeartbeatStream.Name
|
||||||
|
did = rec.HeartbeatStream.DID
|
||||||
|
}
|
||||||
|
evicted = append(evicted, gcEntry{pid, name, did})
|
||||||
|
for _, sstreams := range ix.StreamRecords {
|
||||||
|
if sstreams[pid] != nil {
|
||||||
|
delete(sstreams, pid)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ix.StreamMU.Unlock()
|
||||||
|
|
||||||
|
if ix.AfterDelete != nil {
|
||||||
|
for _, e := range evicted {
|
||||||
|
ix.AfterDelete(e.pid, e.name, e.did)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ix *LongLivedStreamRecordedService[T]) Snapshot(interval time.Duration) {
|
||||||
|
go func() {
|
||||||
|
logger := oclib.GetLogger()
|
||||||
|
t := time.NewTicker(interval)
|
||||||
|
defer t.Stop()
|
||||||
|
for range t.C {
|
||||||
|
infos := ix.snapshot()
|
||||||
|
for _, inf := range infos {
|
||||||
|
logger.Info().Msg(" -> " + inf.DID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------- Snapshot / Query --------
|
||||||
|
func (ix *LongLivedStreamRecordedService[T]) snapshot() []*StreamRecord[T] {
|
||||||
|
ix.StreamMU.Lock()
|
||||||
|
defer ix.StreamMU.Unlock()
|
||||||
|
|
||||||
|
out := make([]*StreamRecord[T], 0, len(ix.StreamRecords))
|
||||||
|
for _, streams := range ix.StreamRecords {
|
||||||
|
for _, stream := range streams {
|
||||||
|
out = append(out, stream)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ix *LongLivedStreamRecordedService[T]) HandleHeartbeat(s network.Stream) {
|
||||||
|
logger := oclib.GetLogger()
|
||||||
|
defer s.Close()
|
||||||
|
|
||||||
|
// AllowInbound: burst guard + ban check before the first byte is read.
|
||||||
|
if ix.AllowInbound != nil {
|
||||||
|
remotePeer := s.Conn().RemotePeer()
|
||||||
|
ix.StreamMU.RLock()
|
||||||
|
_, exists := ix.StreamRecords[ProtocolHeartbeat][remotePeer]
|
||||||
|
ix.StreamMU.RUnlock()
|
||||||
|
if err := ix.AllowInbound(remotePeer, !exists); err != nil {
|
||||||
|
logger.Warn().Err(err).Str("peer", remotePeer.String()).Msg("inbound connection refused")
|
||||||
|
s.Reset()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
dec := json.NewDecoder(s)
|
||||||
|
for {
|
||||||
|
ix.StreamMU.Lock()
|
||||||
|
if ix.StreamRecords[ProtocolHeartbeat] == nil {
|
||||||
|
ix.StreamRecords[ProtocolHeartbeat] = map[pp.ID]*StreamRecord[T]{}
|
||||||
|
}
|
||||||
|
streams := ix.StreamRecords[ProtocolHeartbeat]
|
||||||
|
streamsAnonym := map[pp.ID]HeartBeatStreamed{}
|
||||||
|
for k, v := range streams {
|
||||||
|
streamsAnonym[k] = v
|
||||||
|
}
|
||||||
|
ix.StreamMU.Unlock()
|
||||||
|
pid, hb, err := CheckHeartbeat(ix.Host, s, dec, streamsAnonym, &ix.StreamMU, ix.maxNodesConn)
|
||||||
|
if err != nil {
|
||||||
|
// Stream-level errors (EOF, reset, closed) mean the connection is gone
|
||||||
|
// — exit so the goroutine doesn't spin forever on a dead stream.
|
||||||
|
// Metric/policy errors (score too low, too many connections) are transient
|
||||||
|
// — those are also stream-terminal since the stream carries one session.
|
||||||
|
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) ||
|
||||||
|
strings.Contains(err.Error(), "reset") ||
|
||||||
|
strings.Contains(err.Error(), "closed") ||
|
||||||
|
strings.Contains(err.Error(), "too many connections") {
|
||||||
|
logger.Info().Err(err).Msg("heartbeat stream terminated, closing handler")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
logger.Warn().Err(err).Msg("heartbeat check failed, retrying on same stream")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// ValidateHeartbeat: per-tick behavioral check (rate limiting, bans).
|
||||||
|
if ix.ValidateHeartbeat != nil {
|
||||||
|
if err := ix.ValidateHeartbeat(*pid); err != nil {
|
||||||
|
logger.Warn().Err(err).Str("peer", pid.String()).Msg("heartbeat rejected, closing stream")
|
||||||
|
s.Reset()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ix.StreamMU.Lock()
|
||||||
|
// if record already seen update last seen
|
||||||
|
if rec, ok := streams[*pid]; ok {
|
||||||
|
rec.DID = hb.DID
|
||||||
|
// Preserve the existing UptimeTracker so TotalOnline accumulates correctly.
|
||||||
|
// hb.Stream is a fresh Stream with no UptimeTracker; carry the old one over.
|
||||||
|
oldTracker := rec.GetUptimeTracker()
|
||||||
|
rec.HeartbeatStream = hb.Stream
|
||||||
|
if oldTracker != nil {
|
||||||
|
rec.HeartbeatStream.UptimeTracker = oldTracker
|
||||||
|
} else {
|
||||||
|
rec.HeartbeatStream.UptimeTracker = &UptimeTracker{FirstSeen: time.Now().UTC()}
|
||||||
|
}
|
||||||
|
rec.HeartbeatStream.UptimeTracker.RecordHeartbeat()
|
||||||
|
rec.LastScore = hb.Score
|
||||||
|
logger.Info().Msg("A new node is updated : " + pid.String())
|
||||||
|
} else {
|
||||||
|
tracker := &UptimeTracker{FirstSeen: time.Now().UTC()}
|
||||||
|
tracker.RecordHeartbeat()
|
||||||
|
hb.Stream.UptimeTracker = tracker
|
||||||
|
streams[*pid] = &StreamRecord[T]{
|
||||||
|
DID: hb.DID,
|
||||||
|
HeartbeatStream: hb.Stream,
|
||||||
|
LastScore: hb.Score,
|
||||||
|
}
|
||||||
|
logger.Info().Msg("A new node is subscribed : " + pid.String())
|
||||||
|
}
|
||||||
|
ix.StreamMU.Unlock()
|
||||||
|
// Enrich hb.DID before calling the hook: nodes never set hb.DID directly;
|
||||||
|
// extract it from the embedded signed PeerRecord if available, then fall
|
||||||
|
// back to the DID stored by handleNodePublish in the stream record.
|
||||||
|
if hb.DID == "" && len(hb.Record) > 0 {
|
||||||
|
var partial struct {
|
||||||
|
DID string `json:"did"`
|
||||||
|
}
|
||||||
|
if json.Unmarshal(hb.Record, &partial) == nil && partial.DID != "" {
|
||||||
|
hb.DID = partial.DID
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if hb.DID == "" {
|
||||||
|
ix.StreamMU.RLock()
|
||||||
|
if rec, ok := streams[*pid]; ok {
|
||||||
|
hb.DID = rec.DID
|
||||||
|
}
|
||||||
|
ix.StreamMU.RUnlock()
|
||||||
|
}
|
||||||
|
if ix.AfterHeartbeat != nil && hb.DID != "" {
|
||||||
|
ix.AfterHeartbeat(hb)
|
||||||
|
}
|
||||||
|
// Send response back to the node (bidirectional heartbeat).
|
||||||
|
if ix.BuildHeartbeatResponse != nil {
|
||||||
|
if resp := ix.BuildHeartbeatResponse(s.Conn().RemotePeer(), hb.Need, hb.Challenges, hb.ChallengeDID, hb.Referent); resp != nil {
|
||||||
|
s.SetWriteDeadline(time.Now().Add(3 * time.Second))
|
||||||
|
json.NewEncoder(s).Encode(resp)
|
||||||
|
s.SetWriteDeadline(time.Time{})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func CheckHeartbeat(h host.Host, s network.Stream, dec *json.Decoder, streams map[pp.ID]HeartBeatStreamed, lock *sync.RWMutex, maxNodes int) (*pp.ID, *Heartbeat, error) {
|
||||||
|
if len(h.Network().Peers()) >= maxNodes {
|
||||||
|
return nil, nil, fmt.Errorf("too many connections, try another indexer")
|
||||||
|
}
|
||||||
|
var hb Heartbeat
|
||||||
|
if err := dec.Decode(&hb); err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
_, bpms, latencyScore, _ := getBandwidthChallengeRate(h, s.Conn().RemotePeer(), MinPayloadChallenge+int(rand.Float64()*(MaxPayloadChallenge-MinPayloadChallenge)))
|
||||||
|
{
|
||||||
|
pid, err := pp.Decode(hb.PeerID)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
uptimeRatio := float64(0)
|
||||||
|
age := time.Duration(0)
|
||||||
|
lock.Lock()
|
||||||
|
if rec, ok := streams[pid]; ok && rec.GetUptimeTracker() != nil {
|
||||||
|
uptimeRatio = rec.GetUptimeTracker().UptimeRatio()
|
||||||
|
age = rec.GetUptimeTracker().Uptime()
|
||||||
|
}
|
||||||
|
lock.Unlock()
|
||||||
|
// E: measure the indexer's own subnet diversity, not the node's view.
|
||||||
|
diversity := getOwnDiversityRate(h)
|
||||||
|
// fillRate: fraction of indexer capacity used — higher = more peers trust this indexer.
|
||||||
|
fillRate := 0.0
|
||||||
|
if maxNodes > 0 {
|
||||||
|
fillRate = float64(len(h.Network().Peers())) / float64(maxNodes)
|
||||||
|
if fillRate > 1 {
|
||||||
|
fillRate = 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
hb.ComputeIndexerScore(uptimeRatio, bpms, diversity, latencyScore, fillRate)
|
||||||
|
// B: dynamic minScore — starts at 20% for brand-new peers, ramps to 80% at 24h.
|
||||||
|
minScore := dynamicMinScore(age)
|
||||||
|
if hb.Score < minScore {
|
||||||
|
return nil, nil, errors.New("not enough trusting value")
|
||||||
|
}
|
||||||
|
hb.Stream = &Stream{
|
||||||
|
Name: hb.Name,
|
||||||
|
DID: hb.DID,
|
||||||
|
Stream: s,
|
||||||
|
Expiry: time.Now().UTC().Add(2 * time.Minute),
|
||||||
|
} // here is the long-lived bidirectional heartbeat.
|
||||||
|
return &pid, &hb, err
|
||||||
|
}
|
||||||
|
}
|
||||||
File diff suppressed because it is too large
Load Diff
199
daemons/node/common/consensus.go
Normal file
199
daemons/node/common/consensus.go
Normal file
@@ -0,0 +1,199 @@
|
|||||||
|
package common
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"sort"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
oclib "cloud.o-forge.io/core/oc-lib"
|
||||||
|
"github.com/libp2p/go-libp2p/core/host"
|
||||||
|
"github.com/libp2p/go-libp2p/core/network"
|
||||||
|
pp "github.com/libp2p/go-libp2p/core/peer"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ProtocolIndexerCandidates is opened by a node toward its remaining indexers
|
||||||
|
// to request candidate replacement indexers after an ejection event.
|
||||||
|
const ProtocolIndexerCandidates = "/opencloud/indexer/candidates/1.0"
|
||||||
|
|
||||||
|
// IndexerCandidatesRequest is sent by a node to one of its indexers.
|
||||||
|
// Count is how many candidates are needed.
|
||||||
|
type IndexerCandidatesRequest struct {
|
||||||
|
Count int `json:"count"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// IndexerCandidatesResponse carries a random sample of known indexers from
|
||||||
|
// the responding indexer's DHT cache.
|
||||||
|
type IndexerCandidatesResponse struct {
|
||||||
|
Candidates []pp.AddrInfo `json:"candidates"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// TriggerConsensus asks each remaining indexer for a random pool of candidates,
|
||||||
|
// scores them asynchronously via a one-shot probe heartbeat, and admits the
|
||||||
|
// best ones to StaticIndexers. Falls back to DHT replenishment for any gap.
|
||||||
|
//
|
||||||
|
// Must be called in a goroutine — it blocks until all probes have returned
|
||||||
|
// (or timed out), which can take up to ~10s.
|
||||||
|
func TriggerConsensus(h host.Host, remaining []pp.AddrInfo, need int) {
|
||||||
|
if need <= 0 || len(remaining) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
logger := oclib.GetLogger()
|
||||||
|
logger.Info().Int("voters", len(remaining)).Int("need", need).
|
||||||
|
Msg("[consensus] starting indexer candidate consensus")
|
||||||
|
|
||||||
|
// Phase 1 — collect candidates from all remaining indexers in parallel.
|
||||||
|
type collectResult struct{ candidates []pp.AddrInfo }
|
||||||
|
collectCh := make(chan collectResult, len(remaining))
|
||||||
|
for _, ai := range remaining {
|
||||||
|
go func(ai pp.AddrInfo) {
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
s, err := h.NewStream(ctx, ai.ID, ProtocolIndexerCandidates)
|
||||||
|
if err != nil {
|
||||||
|
collectCh <- collectResult{}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer s.Close()
|
||||||
|
s.SetDeadline(time.Now().Add(5 * time.Second))
|
||||||
|
if err := json.NewEncoder(s).Encode(IndexerCandidatesRequest{Count: need + 2}); err != nil {
|
||||||
|
collectCh <- collectResult{}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var resp IndexerCandidatesResponse
|
||||||
|
if err := json.NewDecoder(s).Decode(&resp); err != nil {
|
||||||
|
collectCh <- collectResult{}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
collectCh <- collectResult{candidates: resp.Candidates}
|
||||||
|
}(ai)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Merge and deduplicate, excluding indexers already in the pool.
|
||||||
|
seen := map[pp.ID]struct{}{}
|
||||||
|
for _, ai := range Indexers.GetAddrIDs() {
|
||||||
|
seen[ai] = struct{}{}
|
||||||
|
|
||||||
|
}
|
||||||
|
var candidates []pp.AddrInfo
|
||||||
|
for range remaining {
|
||||||
|
r := <-collectCh
|
||||||
|
for _, ai := range r.candidates {
|
||||||
|
if _, dup := seen[ai.ID]; !dup {
|
||||||
|
seen[ai.ID] = struct{}{}
|
||||||
|
candidates = append(candidates, ai)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(candidates) == 0 {
|
||||||
|
logger.Info().Msg("[consensus] no candidates from voters, falling back to DHT")
|
||||||
|
replenishIndexersFromDHT(h, need)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
logger.Info().Int("candidates", len(candidates)).Msg("[consensus] scoring candidates")
|
||||||
|
|
||||||
|
// Phase 2 — score all candidates in parallel via a one-shot probe heartbeat.
|
||||||
|
type scoreResult struct {
|
||||||
|
ai pp.AddrInfo
|
||||||
|
score float64
|
||||||
|
}
|
||||||
|
scoreCh := make(chan scoreResult, len(candidates))
|
||||||
|
for _, ai := range candidates {
|
||||||
|
go func(ai pp.AddrInfo) {
|
||||||
|
resp, rtt, err := probeIndexer(h, ai)
|
||||||
|
if err != nil {
|
||||||
|
scoreCh <- scoreResult{ai: ai, score: 0}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
scoreCh <- scoreResult{ai: ai, score: quickScore(resp, rtt)}
|
||||||
|
}(ai)
|
||||||
|
}
|
||||||
|
|
||||||
|
results := make([]scoreResult, 0, len(candidates))
|
||||||
|
for range candidates {
|
||||||
|
results = append(results, <-scoreCh)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort descending by quick score, admit top `need` above the minimum bar.
|
||||||
|
sort.Slice(results, func(i, j int) bool { return results[i].score > results[j].score })
|
||||||
|
minQ := dynamicMinScore(0) // fresh peer: threshold starts at 20
|
||||||
|
|
||||||
|
admitted := 0
|
||||||
|
for _, res := range results {
|
||||||
|
if admitted >= need {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if res.score < minQ {
|
||||||
|
break // sorted desc: everything after is worse
|
||||||
|
}
|
||||||
|
key := addrKey(res.ai)
|
||||||
|
if Indexers.ExistsAddr(key) {
|
||||||
|
continue // already in pool (race with heartbeat path)
|
||||||
|
}
|
||||||
|
cpy := res.ai
|
||||||
|
Indexers.SetAddr(key, &cpy)
|
||||||
|
admitted++
|
||||||
|
}
|
||||||
|
|
||||||
|
if admitted > 0 {
|
||||||
|
logger.Info().Int("admitted", admitted).Msg("[consensus] candidates admitted to pool")
|
||||||
|
Indexers.NudgeIt()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fill any remaining gap with DHT discovery.
|
||||||
|
if gap := need - admitted; gap > 0 {
|
||||||
|
logger.Info().Int("gap", gap).Msg("[consensus] gap after consensus, falling back to DHT")
|
||||||
|
replenishIndexersFromDHT(h, gap)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// probeIndexer dials the candidate, sends one lightweight heartbeat, and
|
||||||
|
// returns the HeartbeatResponse (nil if the indexer doesn't support it) and RTT.
|
||||||
|
func probeIndexer(h host.Host, ai pp.AddrInfo) (*HeartbeatResponse, time.Duration, error) {
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 8*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
if h.Network().Connectedness(ai.ID) != network.Connected {
|
||||||
|
if err := h.Connect(ctx, ai); err != nil {
|
||||||
|
return nil, 0, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
s, err := h.NewStream(ctx, ai.ID, ProtocolHeartbeat)
|
||||||
|
if err != nil {
|
||||||
|
return nil, 0, err
|
||||||
|
}
|
||||||
|
defer s.Close()
|
||||||
|
|
||||||
|
hb := Heartbeat{PeerID: h.ID().String(), Timestamp: time.Now().UTC().Unix()}
|
||||||
|
s.SetWriteDeadline(time.Now().Add(3 * time.Second))
|
||||||
|
if err := json.NewEncoder(s).Encode(hb); err != nil {
|
||||||
|
return nil, 0, err
|
||||||
|
}
|
||||||
|
s.SetWriteDeadline(time.Time{})
|
||||||
|
|
||||||
|
sentAt := time.Now()
|
||||||
|
s.SetReadDeadline(time.Now().Add(5 * time.Second))
|
||||||
|
var resp HeartbeatResponse
|
||||||
|
if err := json.NewDecoder(s).Decode(&resp); err != nil {
|
||||||
|
// Indexer connected but no response: connection itself is the signal.
|
||||||
|
return nil, time.Since(sentAt), nil
|
||||||
|
}
|
||||||
|
return &resp, time.Since(sentAt), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// quickScore computes a lightweight score [0,100] from a probe result.
|
||||||
|
// Uses only fill rate (inverse) and latency — the two signals available
|
||||||
|
// without a full heartbeat history.
|
||||||
|
func quickScore(resp *HeartbeatResponse, rtt time.Duration) float64 {
|
||||||
|
maxRTT := BaseRoundTrip * 10
|
||||||
|
latencyScore := 1.0 - float64(rtt)/float64(maxRTT)
|
||||||
|
if latencyScore < 0 {
|
||||||
|
latencyScore = 0
|
||||||
|
}
|
||||||
|
if resp == nil {
|
||||||
|
// Connection worked but no response (old indexer): moderate score.
|
||||||
|
return latencyScore * 50
|
||||||
|
}
|
||||||
|
fillScore := 1.0 - resp.FillRate // prefer less-loaded indexers
|
||||||
|
return (0.5*latencyScore + 0.5*fillScore) * 100
|
||||||
|
}
|
||||||
219
daemons/node/common/dht_discovery.go
Normal file
219
daemons/node/common/dht_discovery.go
Normal file
@@ -0,0 +1,219 @@
|
|||||||
|
package common
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"math/rand"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
oclib "cloud.o-forge.io/core/oc-lib"
|
||||||
|
"github.com/ipfs/go-cid"
|
||||||
|
dht "github.com/libp2p/go-libp2p-kad-dht"
|
||||||
|
"github.com/libp2p/go-libp2p/core/host"
|
||||||
|
pp "github.com/libp2p/go-libp2p/core/peer"
|
||||||
|
ma "github.com/multiformats/go-multiaddr"
|
||||||
|
mh "github.com/multiformats/go-multihash"
|
||||||
|
)
|
||||||
|
|
||||||
|
// FilterLoopbackAddrs strips loopback (127.x, ::1) and unspecified addresses
|
||||||
|
// from an AddrInfo so we never hand peers an address they cannot dial externally.
|
||||||
|
func FilterLoopbackAddrs(ai pp.AddrInfo) pp.AddrInfo {
|
||||||
|
filtered := make([]ma.Multiaddr, 0, len(ai.Addrs))
|
||||||
|
for _, addr := range ai.Addrs {
|
||||||
|
ip, err := ExtractIP(addr.String())
|
||||||
|
if err != nil || ip.IsLoopback() || ip.IsUnspecified() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
filtered = append(filtered, addr)
|
||||||
|
}
|
||||||
|
return pp.AddrInfo{ID: ai.ID, Addrs: filtered}
|
||||||
|
}
|
||||||
|
|
||||||
|
// RecommendedHeartbeatInterval is the target period between heartbeat ticks.
|
||||||
|
// Indexers use this as the DHT Provide refresh interval.
|
||||||
|
const RecommendedHeartbeatInterval = 60 * time.Second
|
||||||
|
|
||||||
|
// discoveryDHT is the DHT instance used for indexer discovery.
|
||||||
|
// Set by SetDiscoveryDHT once the indexer service initialises its DHT.
|
||||||
|
var discoveryDHT *dht.IpfsDHT
|
||||||
|
|
||||||
|
// SetDiscoveryDHT stores the DHT instance used by replenishIndexersFromDHT.
|
||||||
|
// Called by NewIndexerService once the DHT is ready.
|
||||||
|
func SetDiscoveryDHT(d *dht.IpfsDHT) {
|
||||||
|
discoveryDHT = d
|
||||||
|
}
|
||||||
|
|
||||||
|
// initNodeDHT creates a lightweight DHT client for pure nodes (no IndexerService).
|
||||||
|
// Uses the seed indexers as bootstrap peers. Called lazily by ConnectToIndexers
|
||||||
|
// when discoveryDHT is still nil after the initial warm-up delay.
|
||||||
|
func initNodeDHT(h host.Host, seeds []Entry) {
|
||||||
|
logger := oclib.GetLogger()
|
||||||
|
bootstrapPeers := []pp.AddrInfo{}
|
||||||
|
for _, s := range seeds {
|
||||||
|
bootstrapPeers = append(bootstrapPeers, *s.Info)
|
||||||
|
}
|
||||||
|
d, err := dht.New(context.Background(), h,
|
||||||
|
dht.Mode(dht.ModeClient),
|
||||||
|
dht.ProtocolPrefix("oc"),
|
||||||
|
dht.BootstrapPeers(bootstrapPeers...),
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
logger.Warn().Err(err).Msg("[dht] node DHT client init failed")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
SetDiscoveryDHT(d)
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
if err := d.Bootstrap(ctx); err != nil {
|
||||||
|
logger.Warn().Err(err).Msg("[dht] node DHT client bootstrap failed")
|
||||||
|
}
|
||||||
|
logger.Info().Msg("[dht] node DHT client ready")
|
||||||
|
}
|
||||||
|
|
||||||
|
// IndexerCID returns the well-known CID under which all indexers advertise.
|
||||||
|
func IndexerCID() cid.Cid {
|
||||||
|
h, _ := mh.Sum([]byte("/opencloud/indexers"), mh.SHA2_256, -1)
|
||||||
|
return cid.NewCidV1(cid.Raw, h)
|
||||||
|
}
|
||||||
|
|
||||||
|
// DiscoverIndexersFromDHT uses the DHT to find up to count indexers advertising
|
||||||
|
// under the well-known key. Excludes self. Resolves addresses when the provider
|
||||||
|
// record carries none.
|
||||||
|
func DiscoverIndexersFromDHT(h host.Host, d *dht.IpfsDHT, count int) []pp.AddrInfo {
|
||||||
|
logger := oclib.GetLogger()
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
c := IndexerCID()
|
||||||
|
ch := d.FindProvidersAsync(ctx, c, count*2)
|
||||||
|
seen := map[pp.ID]struct{}{}
|
||||||
|
var results []pp.AddrInfo
|
||||||
|
for ai := range ch {
|
||||||
|
if ai.ID == h.ID() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, dup := seen[ai.ID]; dup {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[ai.ID] = struct{}{}
|
||||||
|
if len(ai.Addrs) == 0 {
|
||||||
|
resolved, err := d.FindPeer(ctx, ai.ID)
|
||||||
|
if err != nil {
|
||||||
|
logger.Warn().Str("peer", ai.ID.String()).Msg("[dht] no addrs and FindPeer failed, skipping")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
ai = resolved
|
||||||
|
}
|
||||||
|
ai = FilterLoopbackAddrs(ai)
|
||||||
|
if len(ai.Addrs) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
results = append(results, ai)
|
||||||
|
if len(results) >= count {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
logger.Info().Int("found", len(results)).Msg("[dht] indexer discovery complete")
|
||||||
|
return results
|
||||||
|
}
|
||||||
|
|
||||||
|
// SelectByFillRate picks up to want providers using fill-rate weighted random
|
||||||
|
// selection w(F) = F*(1-F) — peaks at F=0.5, prefers less-loaded indexers.
|
||||||
|
// Providers with unknown fill rate receive F=0.5 (neutral prior).
|
||||||
|
// Enforces subnet /24 diversity: at most one indexer per /24.
|
||||||
|
func SelectByFillRate(providers []pp.AddrInfo, fillRates map[pp.ID]float64, want int) []pp.AddrInfo {
|
||||||
|
if len(providers) == 0 || want <= 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
type weighted struct {
|
||||||
|
ai pp.AddrInfo
|
||||||
|
weight float64
|
||||||
|
}
|
||||||
|
ws := make([]weighted, 0, len(providers))
|
||||||
|
for _, ai := range providers {
|
||||||
|
f, ok := fillRates[ai.ID]
|
||||||
|
if !ok {
|
||||||
|
f = 0.5
|
||||||
|
}
|
||||||
|
ws = append(ws, weighted{ai: ai, weight: f * (1 - f)})
|
||||||
|
}
|
||||||
|
// Shuffle first for fairness among equal-weight peers.
|
||||||
|
rand.Shuffle(len(ws), func(i, j int) { ws[i], ws[j] = ws[j], ws[i] })
|
||||||
|
// Sort descending by weight (simple insertion sort — small N).
|
||||||
|
for i := 1; i < len(ws); i++ {
|
||||||
|
for j := i; j > 0 && ws[j].weight > ws[j-1].weight; j-- {
|
||||||
|
ws[j], ws[j-1] = ws[j-1], ws[j]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
subnets := map[string]struct{}{}
|
||||||
|
var selected []pp.AddrInfo
|
||||||
|
for _, w := range ws {
|
||||||
|
if len(selected) >= want {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
subnet := subnetOf(w.ai)
|
||||||
|
if subnet != "" {
|
||||||
|
if _, dup := subnets[subnet]; dup {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
subnets[subnet] = struct{}{}
|
||||||
|
}
|
||||||
|
selected = append(selected, w.ai)
|
||||||
|
}
|
||||||
|
return selected
|
||||||
|
}
|
||||||
|
|
||||||
|
// subnetOf returns the /24 subnet string for the first non-loopback address of ai.
|
||||||
|
func subnetOf(ai pp.AddrInfo) string {
|
||||||
|
for _, ma := range ai.Addrs {
|
||||||
|
ip, err := ExtractIP(ma.String())
|
||||||
|
if err != nil || ip.IsLoopback() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
parts := strings.Split(ip.String(), ".")
|
||||||
|
if len(parts) >= 3 {
|
||||||
|
return parts[0] + "." + parts[1] + "." + parts[2]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// replenishIndexersFromDHT is called when an indexer heartbeat fails and more
|
||||||
|
// indexers are needed. Queries the DHT and adds fresh entries to StaticIndexers.
|
||||||
|
func replenishIndexersFromDHT(h host.Host, need int) {
|
||||||
|
if need <= 0 || discoveryDHT == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
logger := oclib.GetLogger()
|
||||||
|
logger.Info().Int("need", need).Msg("[dht] replenishing indexer pool from DHT")
|
||||||
|
|
||||||
|
providers := DiscoverIndexersFromDHT(h, discoveryDHT, need*3)
|
||||||
|
selected := SelectByFillRate(providers, nil, need)
|
||||||
|
if len(selected) == 0 {
|
||||||
|
logger.Warn().Msg("[dht] no indexers found in DHT for replenishment")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
added := 0
|
||||||
|
for _, ai := range selected {
|
||||||
|
addr := addrKey(ai)
|
||||||
|
if !Indexers.ExistsAddr(addr) {
|
||||||
|
adCopy := ai
|
||||||
|
Indexers.SetAddr(addr, &adCopy)
|
||||||
|
added++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if added > 0 {
|
||||||
|
logger.Info().Int("added", added).Msg("[dht] indexers added from DHT")
|
||||||
|
Indexers.NudgeIt()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// addrKey returns the canonical map key for an AddrInfo.
|
||||||
|
// The PeerID is used as key so the same peer is never stored twice regardless
|
||||||
|
// of which of its addresses was seen first.
|
||||||
|
func addrKey(ai pp.AddrInfo) string {
|
||||||
|
return ai.ID.String()
|
||||||
|
}
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -3,6 +3,7 @@ package common
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"math/rand"
|
||||||
"net"
|
"net"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -37,3 +38,31 @@ func ExtractIP(addr string) (net.IP, error) {
|
|||||||
}
|
}
|
||||||
return ip, nil
|
return ip, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func GetIndexer(addrOrId string) *pp.AddrInfo {
|
||||||
|
return Indexers.GetAddr(addrOrId)
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetIndexersIDs() []pp.ID {
|
||||||
|
return Indexers.GetAddrIDs()
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetIndexersStr() []string {
|
||||||
|
return Indexers.GetAddrsStr()
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetIndexers() []*pp.AddrInfo {
|
||||||
|
entries := Indexers.GetAddrs()
|
||||||
|
result := make([]*pp.AddrInfo, 0, len(entries))
|
||||||
|
for _, e := range entries {
|
||||||
|
result = append(result, e.Info)
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func Shuffle[T any](slice []T) []T {
|
||||||
|
rand.Shuffle(len(slice), func(i, j int) {
|
||||||
|
slice[i], slice[j] = slice[j], slice[i]
|
||||||
|
})
|
||||||
|
return slice
|
||||||
|
}
|
||||||
|
|||||||
137
daemons/node/connection_gater.go
Normal file
137
daemons/node/connection_gater.go
Normal file
@@ -0,0 +1,137 @@
|
|||||||
|
package node
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"oc-discovery/daemons/node/common"
|
||||||
|
"oc-discovery/daemons/node/indexer"
|
||||||
|
|
||||||
|
oclib "cloud.o-forge.io/core/oc-lib"
|
||||||
|
"cloud.o-forge.io/core/oc-lib/dbs"
|
||||||
|
"cloud.o-forge.io/core/oc-lib/models/peer"
|
||||||
|
"github.com/libp2p/go-libp2p/core/control"
|
||||||
|
"github.com/libp2p/go-libp2p/core/host"
|
||||||
|
"github.com/libp2p/go-libp2p/core/network"
|
||||||
|
pp "github.com/libp2p/go-libp2p/core/peer"
|
||||||
|
ma "github.com/multiformats/go-multiaddr"
|
||||||
|
)
|
||||||
|
|
||||||
|
// OCConnectionGater enforces two rules on every inbound connection:
|
||||||
|
// 1. If the peer is known locally and blacklisted → reject.
|
||||||
|
// 2. If the peer is unknown locally → ask indexers one by one whether it
|
||||||
|
// exists in the DHT. Accept as soon as one confirms it; reject if none do
|
||||||
|
// (or if no indexers are reachable yet, allow optimistically).
|
||||||
|
//
|
||||||
|
// Outbound connections are always allowed — we chose to dial them.
|
||||||
|
type OCConnectionGater struct {
|
||||||
|
host host.Host
|
||||||
|
}
|
||||||
|
|
||||||
|
func newOCConnectionGater(h host.Host) *OCConnectionGater {
|
||||||
|
return &OCConnectionGater{host: h}
|
||||||
|
}
|
||||||
|
|
||||||
|
// InterceptPeerDial — allow all outbound dials.
|
||||||
|
func (g *OCConnectionGater) InterceptPeerDial(_ pp.ID) bool { return true }
|
||||||
|
|
||||||
|
// InterceptAddrDial — allow all outbound dials.
|
||||||
|
func (g *OCConnectionGater) InterceptAddrDial(_ pp.ID, _ ma.Multiaddr) bool { return true }
|
||||||
|
|
||||||
|
// InterceptAccept — allow at transport level (PeerID not yet known).
|
||||||
|
func (g *OCConnectionGater) InterceptAccept(_ network.ConnMultiaddrs) bool { return true }
|
||||||
|
|
||||||
|
// InterceptUpgraded — final gate; always allow (decisions already made in InterceptSecured).
|
||||||
|
func (g *OCConnectionGater) InterceptUpgraded(_ network.Conn) (bool, control.DisconnectReason) {
|
||||||
|
return true, 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// InterceptSecured is called after the cryptographic handshake — PeerID is now known.
|
||||||
|
// Only inbound connections are verified; outbound are trusted.
|
||||||
|
func (g *OCConnectionGater) InterceptSecured(dir network.Direction, pid pp.ID, _ network.ConnMultiaddrs) bool {
|
||||||
|
if dir == network.DirOutbound {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
logger := oclib.GetLogger()
|
||||||
|
|
||||||
|
// 1. Local DB lookup by PeerID.
|
||||||
|
access := oclib.NewRequestAdmin(oclib.LibDataEnum(oclib.PEER), nil)
|
||||||
|
results := access.Search(&dbs.Filters{
|
||||||
|
And: map[string][]dbs.Filter{ // search by name if no filters are provided
|
||||||
|
"peer_id": {{Operator: dbs.EQUAL.String(), Value: pid.String()}},
|
||||||
|
},
|
||||||
|
}, pid.String(), false)
|
||||||
|
for _, item := range results.Data {
|
||||||
|
p, ok := item.(*peer.Peer)
|
||||||
|
if !ok || p.PeerID != pid.String() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if p.Relation == peer.BLACKLIST {
|
||||||
|
logger.Warn().Str("peer", pid.String()).Msg("[gater] rejected blacklisted peer")
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
// Known, not blacklisted.
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. Unknown locally — verify via indexers.
|
||||||
|
indexers := common.Indexers.GetAddrs()
|
||||||
|
|
||||||
|
if len(indexers) == 0 {
|
||||||
|
// No indexers reachable yet — allow optimistically (bootstrap phase).
|
||||||
|
logger.Warn().Str("peer", pid.String()).Msg("[gater] no indexers available, allowing unverified inbound")
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
req := indexer.GetValue{PeerID: pid.String()}
|
||||||
|
// A single DHT GetValue already traverses the entire DHT network, so asking
|
||||||
|
// a second indexer would yield the same result. We only fall through to the
|
||||||
|
// next indexer if the current one is unreachable (transport error), not if
|
||||||
|
// it returns found=false (that answer is already DHT-wide authoritative).
|
||||||
|
for _, ai := range indexers {
|
||||||
|
found, reachable := queryIndexerPeerExists(g.host, *ai.Info, req)
|
||||||
|
if !reachable {
|
||||||
|
continue // indexer down — try next
|
||||||
|
}
|
||||||
|
if !found {
|
||||||
|
logger.Warn().Str("peer", pid.String()).Msg("[gater] peer not found in DHT, rejecting inbound")
|
||||||
|
}
|
||||||
|
return found // definitive DHT answer
|
||||||
|
}
|
||||||
|
|
||||||
|
// All indexers unreachable — allow optimistically rather than blocking indefinitely.
|
||||||
|
logger.Warn().Str("peer", pid.String()).Msg("[gater] all indexers unreachable, allowing unverified inbound")
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// queryIndexerPeerExists opens a fresh one-shot stream to ai, sends a GetValue
|
||||||
|
// request, and returns (found, reachable).
|
||||||
|
// reachable=false means the indexer could not be reached (transport error);
|
||||||
|
// the caller should then try another indexer.
|
||||||
|
// reachable=true means the indexer answered — found is the DHT-wide authoritative result.
|
||||||
|
func queryIndexerPeerExists(h host.Host, ai pp.AddrInfo, req indexer.GetValue) (found, reachable bool) {
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
if h.Network().Connectedness(ai.ID) != network.Connected {
|
||||||
|
if err := h.Connect(ctx, ai); err != nil {
|
||||||
|
return false, false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
s, err := h.NewStream(ctx, ai.ID, common.ProtocolGet)
|
||||||
|
if err != nil {
|
||||||
|
return false, false
|
||||||
|
}
|
||||||
|
defer s.Close()
|
||||||
|
s.SetDeadline(time.Now().Add(3 * time.Second))
|
||||||
|
|
||||||
|
if err := json.NewEncoder(s).Encode(req); err != nil {
|
||||||
|
return false, false
|
||||||
|
}
|
||||||
|
var resp indexer.GetResponse
|
||||||
|
if err := json.NewDecoder(s).Decode(&resp); err != nil {
|
||||||
|
return false, false
|
||||||
|
}
|
||||||
|
return resp.Found, true
|
||||||
|
}
|
||||||
254
daemons/node/indexer/behavior.go
Normal file
254
daemons/node/indexer/behavior.go
Normal file
@@ -0,0 +1,254 @@
|
|||||||
|
package indexer
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"oc-discovery/conf"
|
||||||
|
|
||||||
|
pp "github.com/libp2p/go-libp2p/core/peer"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ── defaults ──────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
const (
|
||||||
|
defaultMaxConnPerWindow = 20
|
||||||
|
defaultConnWindowSecs = 30
|
||||||
|
defaultMaxHBPerMinute = 5
|
||||||
|
defaultMaxPublishPerMin = 10
|
||||||
|
defaultMaxGetPerMin = 50
|
||||||
|
strikeThreshold = 3
|
||||||
|
banDuration = 10 * time.Minute
|
||||||
|
behaviorWindowDur = 60 * time.Second
|
||||||
|
)
|
||||||
|
|
||||||
|
func cfgOr(v, def int) int {
|
||||||
|
if v > 0 {
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
return def
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── ConnectionRateGuard ───────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
// ConnectionRateGuard limits the number of NEW incoming connections accepted
|
||||||
|
// within a sliding time window. It protects public indexers against coordinated
|
||||||
|
// registration floods (Sybil bursts).
|
||||||
|
type ConnectionRateGuard struct {
|
||||||
|
mu sync.Mutex
|
||||||
|
window []time.Time
|
||||||
|
maxInWindow int
|
||||||
|
windowDur time.Duration
|
||||||
|
}
|
||||||
|
|
||||||
|
func newConnectionRateGuard() *ConnectionRateGuard {
|
||||||
|
cfg := conf.GetConfig()
|
||||||
|
return &ConnectionRateGuard{
|
||||||
|
maxInWindow: cfgOr(cfg.MaxConnPerWindow, defaultMaxConnPerWindow),
|
||||||
|
windowDur: time.Duration(cfgOr(cfg.ConnWindowSecs, defaultConnWindowSecs)) * time.Second,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allow returns true if a new connection may be accepted.
|
||||||
|
// The internal window is pruned on each call so memory stays bounded.
|
||||||
|
func (g *ConnectionRateGuard) Allow() bool {
|
||||||
|
g.mu.Lock()
|
||||||
|
defer g.mu.Unlock()
|
||||||
|
now := time.Now()
|
||||||
|
cutoff := now.Add(-g.windowDur)
|
||||||
|
i := 0
|
||||||
|
for i < len(g.window) && g.window[i].Before(cutoff) {
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
g.window = g.window[i:]
|
||||||
|
if len(g.window) >= g.maxInWindow {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
g.window = append(g.window, now)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── per-node state ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
type nodeBehavior struct {
|
||||||
|
mu sync.Mutex
|
||||||
|
knownDID string
|
||||||
|
hbTimes []time.Time
|
||||||
|
pubTimes []time.Time
|
||||||
|
getTimes []time.Time
|
||||||
|
strikes int
|
||||||
|
bannedUntil time.Time
|
||||||
|
}
|
||||||
|
|
||||||
|
func (nb *nodeBehavior) isBanned() bool {
|
||||||
|
return time.Now().Before(nb.bannedUntil)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (nb *nodeBehavior) strike(n int) {
|
||||||
|
nb.strikes += n
|
||||||
|
if nb.strikes >= strikeThreshold {
|
||||||
|
nb.bannedUntil = time.Now().Add(banDuration)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func pruneWindow(ts []time.Time, dur time.Duration) []time.Time {
|
||||||
|
cutoff := time.Now().Add(-dur)
|
||||||
|
i := 0
|
||||||
|
for i < len(ts) && ts[i].Before(cutoff) {
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
return ts[i:]
|
||||||
|
}
|
||||||
|
|
||||||
|
// recordInWindow appends now to the window slice and returns false (+ adds a
|
||||||
|
// strike) when the count exceeds max.
|
||||||
|
func (nb *nodeBehavior) recordInWindow(ts *[]time.Time, max int) bool {
|
||||||
|
*ts = pruneWindow(*ts, behaviorWindowDur)
|
||||||
|
if len(*ts) >= max {
|
||||||
|
nb.strike(1)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
*ts = append(*ts, time.Now())
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── NodeBehaviorTracker ───────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
// NodeBehaviorTracker is the indexer-side per-node compliance monitor.
|
||||||
|
// It is entirely local: no state is shared with other indexers.
|
||||||
|
type NodeBehaviorTracker struct {
|
||||||
|
mu sync.RWMutex
|
||||||
|
nodes map[pp.ID]*nodeBehavior
|
||||||
|
|
||||||
|
maxHB int
|
||||||
|
maxPub int
|
||||||
|
maxGet int
|
||||||
|
}
|
||||||
|
|
||||||
|
func newNodeBehaviorTracker() *NodeBehaviorTracker {
|
||||||
|
cfg := conf.GetConfig()
|
||||||
|
return &NodeBehaviorTracker{
|
||||||
|
nodes: make(map[pp.ID]*nodeBehavior),
|
||||||
|
maxHB: cfgOr(cfg.MaxHBPerMinute, defaultMaxHBPerMinute),
|
||||||
|
maxPub: cfgOr(cfg.MaxPublishPerMinute, defaultMaxPublishPerMin),
|
||||||
|
maxGet: cfgOr(cfg.MaxGetPerMinute, defaultMaxGetPerMin),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *NodeBehaviorTracker) get(pid pp.ID) *nodeBehavior {
|
||||||
|
t.mu.RLock()
|
||||||
|
nb := t.nodes[pid]
|
||||||
|
t.mu.RUnlock()
|
||||||
|
if nb != nil {
|
||||||
|
return nb
|
||||||
|
}
|
||||||
|
t.mu.Lock()
|
||||||
|
defer t.mu.Unlock()
|
||||||
|
if nb = t.nodes[pid]; nb == nil {
|
||||||
|
nb = &nodeBehavior{}
|
||||||
|
t.nodes[pid] = nb
|
||||||
|
}
|
||||||
|
return nb
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsBanned returns true when the peer is in an active ban period.
|
||||||
|
func (t *NodeBehaviorTracker) IsBanned(pid pp.ID) bool {
|
||||||
|
nb := t.get(pid)
|
||||||
|
nb.mu.Lock()
|
||||||
|
defer nb.mu.Unlock()
|
||||||
|
return nb.isBanned()
|
||||||
|
}
|
||||||
|
|
||||||
|
// RecordHeartbeat checks heartbeat cadence. Returns an error if the peer is
|
||||||
|
// flooding (too many heartbeats in the sliding window).
|
||||||
|
func (t *NodeBehaviorTracker) RecordHeartbeat(pid pp.ID) error {
|
||||||
|
nb := t.get(pid)
|
||||||
|
nb.mu.Lock()
|
||||||
|
defer nb.mu.Unlock()
|
||||||
|
if nb.isBanned() {
|
||||||
|
return errors.New("peer is banned")
|
||||||
|
}
|
||||||
|
if !nb.recordInWindow(&nb.hbTimes, t.maxHB) {
|
||||||
|
return errors.New("heartbeat flood detected")
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// CheckIdentity verifies that the DID associated with a PeerID never changes.
|
||||||
|
// A DID change is a strong signal of identity spoofing.
|
||||||
|
func (t *NodeBehaviorTracker) CheckIdentity(pid pp.ID, did string) error {
|
||||||
|
if did == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
nb := t.get(pid)
|
||||||
|
nb.mu.Lock()
|
||||||
|
defer nb.mu.Unlock()
|
||||||
|
if nb.knownDID == "" {
|
||||||
|
nb.knownDID = did
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if nb.knownDID != did {
|
||||||
|
nb.strike(2) // identity change is severe
|
||||||
|
return errors.New("DID mismatch for peer " + pid.String())
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// RecordBadSignature registers a cryptographic verification failure.
|
||||||
|
// A single bad signature is worth 2 strikes (near-immediate ban).
|
||||||
|
func (t *NodeBehaviorTracker) RecordBadSignature(pid pp.ID) {
|
||||||
|
nb := t.get(pid)
|
||||||
|
nb.mu.Lock()
|
||||||
|
defer nb.mu.Unlock()
|
||||||
|
nb.strike(2)
|
||||||
|
}
|
||||||
|
|
||||||
|
// RecordPublish checks publish volume. Returns an error if the peer is
|
||||||
|
// sending too many publish requests.
|
||||||
|
func (t *NodeBehaviorTracker) RecordPublish(pid pp.ID) error {
|
||||||
|
nb := t.get(pid)
|
||||||
|
nb.mu.Lock()
|
||||||
|
defer nb.mu.Unlock()
|
||||||
|
if nb.isBanned() {
|
||||||
|
return errors.New("peer is banned")
|
||||||
|
}
|
||||||
|
if !nb.recordInWindow(&nb.pubTimes, t.maxPub) {
|
||||||
|
return errors.New("publish volume exceeded")
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// RecordGet checks get volume. Returns an error if the peer is enumerating
|
||||||
|
// the DHT at an abnormal rate.
|
||||||
|
func (t *NodeBehaviorTracker) RecordGet(pid pp.ID) error {
|
||||||
|
nb := t.get(pid)
|
||||||
|
nb.mu.Lock()
|
||||||
|
defer nb.mu.Unlock()
|
||||||
|
if nb.isBanned() {
|
||||||
|
return errors.New("peer is banned")
|
||||||
|
}
|
||||||
|
if !nb.recordInWindow(&nb.getTimes, t.maxGet) {
|
||||||
|
return errors.New("get volume exceeded")
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cleanup removes the behavior entry for a peer if it is not currently banned.
|
||||||
|
// Called when the peer is evicted from StreamRecords by the GC.
|
||||||
|
func (t *NodeBehaviorTracker) Cleanup(pid pp.ID) {
|
||||||
|
t.mu.RLock()
|
||||||
|
nb := t.nodes[pid]
|
||||||
|
t.mu.RUnlock()
|
||||||
|
if nb == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
nb.mu.Lock()
|
||||||
|
banned := nb.isBanned()
|
||||||
|
nb.mu.Unlock()
|
||||||
|
if !banned {
|
||||||
|
t.mu.Lock()
|
||||||
|
delete(t.nodes, pid)
|
||||||
|
t.mu.Unlock()
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -7,7 +7,7 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"oc-discovery/conf"
|
"math/rand"
|
||||||
"oc-discovery/daemons/node/common"
|
"oc-discovery/daemons/node/common"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
@@ -18,7 +18,7 @@ import (
|
|||||||
"cloud.o-forge.io/core/oc-lib/tools"
|
"cloud.o-forge.io/core/oc-lib/tools"
|
||||||
"github.com/libp2p/go-libp2p/core/crypto"
|
"github.com/libp2p/go-libp2p/core/crypto"
|
||||||
"github.com/libp2p/go-libp2p/core/network"
|
"github.com/libp2p/go-libp2p/core/network"
|
||||||
"github.com/libp2p/go-libp2p/core/peer"
|
lpp "github.com/libp2p/go-libp2p/core/peer"
|
||||||
)
|
)
|
||||||
|
|
||||||
type PeerRecordPayload struct {
|
type PeerRecordPayload struct {
|
||||||
@@ -118,6 +118,7 @@ func (ix *IndexerService) genPIDKey(peerID string) string {
|
|||||||
func (ix *IndexerService) initNodeHandler() {
|
func (ix *IndexerService) initNodeHandler() {
|
||||||
logger := oclib.GetLogger()
|
logger := oclib.GetLogger()
|
||||||
logger.Info().Msg("Init Node Handler")
|
logger.Info().Msg("Init Node Handler")
|
||||||
|
|
||||||
// Each heartbeat from a node carries a freshly signed PeerRecord.
|
// Each heartbeat from a node carries a freshly signed PeerRecord.
|
||||||
// Republish it to the DHT so the record never expires as long as the node
|
// Republish it to the DHT so the record never expires as long as the node
|
||||||
// is alive — no separate publish stream needed from the node side.
|
// is alive — no separate publish stream needed from the node side.
|
||||||
@@ -177,49 +178,48 @@ func (ix *IndexerService) initNodeHandler() {
|
|||||||
ix.Host.SetStreamHandler(common.ProtocolHeartbeat, ix.HandleHeartbeat)
|
ix.Host.SetStreamHandler(common.ProtocolHeartbeat, ix.HandleHeartbeat)
|
||||||
ix.Host.SetStreamHandler(common.ProtocolPublish, ix.handleNodePublish)
|
ix.Host.SetStreamHandler(common.ProtocolPublish, ix.handleNodePublish)
|
||||||
ix.Host.SetStreamHandler(common.ProtocolGet, ix.handleNodeGet)
|
ix.Host.SetStreamHandler(common.ProtocolGet, ix.handleNodeGet)
|
||||||
ix.Host.SetStreamHandler(common.ProtocolIndexerGetNatives, ix.handleGetNatives)
|
ix.Host.SetStreamHandler(common.ProtocolIndexerCandidates, ix.handleCandidateRequest)
|
||||||
ix.Host.SetStreamHandler(common.ProtocolIndexerConsensus, ix.handleIndexerConsensus)
|
ix.initSearchHandlers()
|
||||||
}
|
}
|
||||||
|
|
||||||
// handleIndexerConsensus implements Phase 2 liveness voting (ProtocolIndexerConsensus).
|
// handleCandidateRequest responds to a node's consensus candidate request.
|
||||||
// The caller sends a list of candidate multiaddrs; this indexer replies with the
|
// Returns a random sample of indexers from the local DHT cache.
|
||||||
// subset it considers currently alive (recent heartbeat in StreamRecords).
|
func (ix *IndexerService) handleCandidateRequest(s network.Stream) {
|
||||||
func (ix *IndexerService) handleIndexerConsensus(stream network.Stream) {
|
defer s.Close()
|
||||||
defer stream.Reset()
|
s.SetDeadline(time.Now().Add(5 * time.Second))
|
||||||
|
var req common.IndexerCandidatesRequest
|
||||||
var req common.IndexerConsensusRequest
|
if err := json.NewDecoder(s).Decode(&req); err != nil {
|
||||||
if err := json.NewDecoder(stream).Decode(&req); err != nil {
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
if req.Count <= 0 || req.Count > 10 {
|
||||||
|
req.Count = 3
|
||||||
|
}
|
||||||
|
ix.dhtCacheMu.RLock()
|
||||||
|
cache := make([]dhtCacheEntry, len(ix.dhtCache))
|
||||||
|
copy(cache, ix.dhtCache)
|
||||||
|
ix.dhtCacheMu.RUnlock()
|
||||||
|
|
||||||
ix.StreamMU.RLock()
|
// Shuffle for randomness: each voter offers a different subset.
|
||||||
streams := ix.StreamRecords[common.ProtocolHeartbeat]
|
rand.Shuffle(len(cache), func(i, j int) { cache[i], cache[j] = cache[j], cache[i] })
|
||||||
ix.StreamMU.RUnlock()
|
candidates := make([]lpp.AddrInfo, 0, req.Count)
|
||||||
|
for _, e := range cache {
|
||||||
alive := make([]string, 0, len(req.Candidates))
|
if len(candidates) >= req.Count {
|
||||||
for _, addr := range req.Candidates {
|
break
|
||||||
ad, err := peer.AddrInfoFromString(addr)
|
|
||||||
if err != nil {
|
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
ix.StreamMU.RLock()
|
candidates = append(candidates, e.AI)
|
||||||
rec, ok := streams[ad.ID]
|
|
||||||
ix.StreamMU.RUnlock()
|
|
||||||
if !ok || rec.HeartbeatStream == nil || rec.HeartbeatStream.UptimeTracker == nil {
|
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
// D: consider alive only if recent heartbeat AND score above minimum quality bar.
|
json.NewEncoder(s).Encode(common.IndexerCandidatesResponse{Candidates: candidates})
|
||||||
if time.Since(rec.HeartbeatStream.UptimeTracker.LastSeen) <= 2*common.RecommendedHeartbeatInterval &&
|
|
||||||
rec.LastScore >= 30.0 {
|
|
||||||
alive = append(alive, addr)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
json.NewEncoder(stream).Encode(common.IndexerConsensusResponse{Alive: alive})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ix *IndexerService) handleNodePublish(s network.Stream) {
|
func (ix *IndexerService) handleNodePublish(s network.Stream) {
|
||||||
defer s.Close()
|
defer s.Close()
|
||||||
logger := oclib.GetLogger()
|
logger := oclib.GetLogger()
|
||||||
|
remotePeer := s.Conn().RemotePeer()
|
||||||
|
if err := ix.behavior.RecordPublish(remotePeer); err != nil {
|
||||||
|
logger.Warn().Err(err).Str("peer", remotePeer.String()).Msg("publish refused")
|
||||||
|
s.Reset()
|
||||||
|
return
|
||||||
|
}
|
||||||
for {
|
for {
|
||||||
var rec PeerRecord
|
var rec PeerRecord
|
||||||
if err := json.NewDecoder(s).Decode(&rec); err != nil {
|
if err := json.NewDecoder(s).Decode(&rec); err != nil {
|
||||||
@@ -233,14 +233,20 @@ func (ix *IndexerService) handleNodePublish(s network.Stream) {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if _, err := rec.Verify(); err != nil {
|
if _, err := rec.Verify(); err != nil {
|
||||||
logger.Err(err)
|
ix.behavior.RecordBadSignature(remotePeer)
|
||||||
|
logger.Warn().Err(err).Str("peer", remotePeer.String()).Msg("bad signature on publish")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err := ix.behavior.CheckIdentity(remotePeer, rec.DID); err != nil {
|
||||||
|
logger.Warn().Err(err).Msg("identity mismatch on publish")
|
||||||
|
s.Reset()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if rec.PeerID == "" || rec.ExpiryDate.Before(time.Now().UTC()) {
|
if rec.PeerID == "" || rec.ExpiryDate.Before(time.Now().UTC()) {
|
||||||
logger.Err(errors.New(rec.PeerID + " is expired."))
|
logger.Err(errors.New(rec.PeerID + " is expired."))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
pid, err := peer.Decode(rec.PeerID)
|
pid, err := lpp.Decode(rec.PeerID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@@ -248,7 +254,7 @@ func (ix *IndexerService) handleNodePublish(s network.Stream) {
|
|||||||
ix.StreamMU.Lock()
|
ix.StreamMU.Lock()
|
||||||
defer ix.StreamMU.Unlock()
|
defer ix.StreamMU.Unlock()
|
||||||
if ix.StreamRecords[common.ProtocolHeartbeat] == nil {
|
if ix.StreamRecords[common.ProtocolHeartbeat] == nil {
|
||||||
ix.StreamRecords[common.ProtocolHeartbeat] = map[peer.ID]*common.StreamRecord[PeerRecord]{}
|
ix.StreamRecords[common.ProtocolHeartbeat] = map[lpp.ID]*common.StreamRecord[PeerRecord]{}
|
||||||
}
|
}
|
||||||
streams := ix.StreamRecords[common.ProtocolHeartbeat]
|
streams := ix.StreamRecords[common.ProtocolHeartbeat]
|
||||||
if srec, ok := streams[pid]; ok {
|
if srec, ok := streams[pid]; ok {
|
||||||
@@ -297,6 +303,12 @@ func (ix *IndexerService) handleNodePublish(s network.Stream) {
|
|||||||
func (ix *IndexerService) handleNodeGet(s network.Stream) {
|
func (ix *IndexerService) handleNodeGet(s network.Stream) {
|
||||||
defer s.Close()
|
defer s.Close()
|
||||||
logger := oclib.GetLogger()
|
logger := oclib.GetLogger()
|
||||||
|
remotePeer := s.Conn().RemotePeer()
|
||||||
|
if err := ix.behavior.RecordGet(remotePeer); err != nil {
|
||||||
|
logger.Warn().Err(err).Str("peer", remotePeer.String()).Msg("get refused")
|
||||||
|
s.Reset()
|
||||||
|
return
|
||||||
|
}
|
||||||
for {
|
for {
|
||||||
var req GetValue
|
var req GetValue
|
||||||
if err := json.NewDecoder(s).Decode(&req); err != nil {
|
if err := json.NewDecoder(s).Decode(&req); err != nil {
|
||||||
@@ -367,43 +379,3 @@ func (ix *IndexerService) handleNodeGet(s network.Stream) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// handleGetNatives returns this indexer's configured native addresses,
|
|
||||||
// excluding any in the request's Exclude list.
|
|
||||||
func (ix *IndexerService) handleGetNatives(s network.Stream) {
|
|
||||||
defer s.Close()
|
|
||||||
logger := oclib.GetLogger()
|
|
||||||
for {
|
|
||||||
var req common.GetIndexerNativesRequest
|
|
||||||
if err := json.NewDecoder(s).Decode(&req); err != nil {
|
|
||||||
logger.Err(err).Msg("indexer get natives: decode")
|
|
||||||
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) ||
|
|
||||||
strings.Contains(err.Error(), "reset") ||
|
|
||||||
strings.Contains(err.Error(), "closed") ||
|
|
||||||
strings.Contains(err.Error(), "too many connections") {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
excludeSet := make(map[string]struct{}, len(req.Exclude))
|
|
||||||
for _, e := range req.Exclude {
|
|
||||||
excludeSet[e] = struct{}{}
|
|
||||||
}
|
|
||||||
|
|
||||||
resp := common.GetIndexerNativesResponse{}
|
|
||||||
for _, addr := range strings.Split(conf.GetConfig().NativeIndexerAddresses, ",") {
|
|
||||||
addr = strings.TrimSpace(addr)
|
|
||||||
if addr == "" {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if _, excluded := excludeSet[addr]; !excluded {
|
|
||||||
resp.Natives = append(resp.Natives, addr)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := json.NewEncoder(s).Encode(resp); err != nil {
|
|
||||||
logger.Err(err).Msg("indexer get natives: encode response")
|
|
||||||
}
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -3,7 +3,6 @@ package indexer
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
@@ -43,19 +42,69 @@ type NameIndexEvent struct {
|
|||||||
|
|
||||||
// nameIndexState holds the local in-memory name index and the sender-side
|
// nameIndexState holds the local in-memory name index and the sender-side
|
||||||
// deduplication tracker.
|
// deduplication tracker.
|
||||||
|
//
|
||||||
|
// Search strategy: trigram inverted index.
|
||||||
|
// - byName: lowercased name → peerID → DID (for delete and exact resolution)
|
||||||
|
// - byPeer: peerID → lowercased name (to recompute trigrams on delete)
|
||||||
|
// - trigrams: 3-char substring → set of peerIDs (for O(1) substring lookup)
|
||||||
|
//
|
||||||
|
// For needles shorter than 3 chars the trigram index cannot help; a linear
|
||||||
|
// scan of byName is used as fallback (rare and fast enough at small N).
|
||||||
type nameIndexState struct {
|
type nameIndexState struct {
|
||||||
// index: name → peerID → DID, built from events received from all indexers.
|
byName map[string]map[string]string // name → peerID → DID
|
||||||
index map[string]map[string]string
|
byPeer map[string]string // peerID → name
|
||||||
|
trigrams map[string]map[string]struct{} // trigram → peerID set
|
||||||
indexMu sync.RWMutex
|
indexMu sync.RWMutex
|
||||||
|
|
||||||
// emitted tracks the last emission time for each (action, name, peerID) key
|
// emitted deduplicates GossipSub emissions within nameIndexDedupWindow.
|
||||||
// to suppress duplicates within nameIndexDedupWindow.
|
// Purged periodically to prevent unbounded growth.
|
||||||
emitted map[string]time.Time
|
emitted map[string]time.Time
|
||||||
emittedMu sync.Mutex
|
emittedMu sync.Mutex
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// trigramsOf returns all overlapping 3-char substrings of s (already lowercased).
|
||||||
|
// If s is shorter than 3 chars the string itself is returned as the sole token.
|
||||||
|
func trigramsOf(s string) []string {
|
||||||
|
if len(s) < 3 {
|
||||||
|
return []string{s}
|
||||||
|
}
|
||||||
|
out := make([]string, 0, len(s)-2)
|
||||||
|
for i := 0; i <= len(s)-3; i++ {
|
||||||
|
out = append(out, s[i:i+3])
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// addTrigrams inserts peerID into every trigram bucket for name.
|
||||||
|
func (s *nameIndexState) addTrigrams(name, peerID string) {
|
||||||
|
for _, tg := range trigramsOf(name) {
|
||||||
|
if s.trigrams[tg] == nil {
|
||||||
|
s.trigrams[tg] = map[string]struct{}{}
|
||||||
|
}
|
||||||
|
s.trigrams[tg][peerID] = struct{}{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// removeTrigrams deletes peerID from every trigram bucket for name,
|
||||||
|
// cleaning up empty buckets to keep memory tight.
|
||||||
|
func (s *nameIndexState) removeTrigrams(name, peerID string) {
|
||||||
|
for _, tg := range trigramsOf(name) {
|
||||||
|
if m := s.trigrams[tg]; m != nil {
|
||||||
|
delete(m, peerID)
|
||||||
|
if len(m) == 0 {
|
||||||
|
delete(s.trigrams, tg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// shouldEmit returns true if the (action, name, peerID) tuple has not been
|
// shouldEmit returns true if the (action, name, peerID) tuple has not been
|
||||||
// emitted within nameIndexDedupWindow, updating the tracker if so.
|
// emitted within nameIndexDedupWindow, updating the tracker if so.
|
||||||
|
//
|
||||||
|
// On DELETE: the ADD entry for the same peer is immediately removed — the peer
|
||||||
|
// is gone, keeping it would cause the map to grow with departed peers forever.
|
||||||
|
// The DELETE entry itself is kept for the dedup window to absorb duplicate
|
||||||
|
// delete events, then cleaned by the purgeEmitted ticker.
|
||||||
func (s *nameIndexState) shouldEmit(action NameIndexAction, name, peerID string) bool {
|
func (s *nameIndexState) shouldEmit(action NameIndexAction, name, peerID string) bool {
|
||||||
key := string(action) + ":" + name + ":" + peerID
|
key := string(action) + ":" + name + ":" + peerID
|
||||||
s.emittedMu.Lock()
|
s.emittedMu.Lock()
|
||||||
@@ -64,9 +113,27 @@ func (s *nameIndexState) shouldEmit(action NameIndexAction, name, peerID string)
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
s.emitted[key] = time.Now()
|
s.emitted[key] = time.Now()
|
||||||
|
if action == NameIndexDelete {
|
||||||
|
// Peer is leaving: drop its ADD entry — no longer needed.
|
||||||
|
delete(s.emitted, string(NameIndexAdd)+":"+name+":"+peerID)
|
||||||
|
}
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// purgeEmitted removes stale DELETE entries from the emitted dedup map.
|
||||||
|
// ADD entries are cleaned eagerly on DELETE, so only short-lived DELETE
|
||||||
|
// entries remain here; the ticker just trims those stragglers.
|
||||||
|
func (s *nameIndexState) purgeEmitted() {
|
||||||
|
now := time.Now()
|
||||||
|
s.emittedMu.Lock()
|
||||||
|
defer s.emittedMu.Unlock()
|
||||||
|
for k, t := range s.emitted {
|
||||||
|
if now.Sub(t) >= nameIndexDedupWindow {
|
||||||
|
delete(s.emitted, k)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// onEvent applies a received NameIndexEvent to the local index.
|
// onEvent applies a received NameIndexEvent to the local index.
|
||||||
// "add" inserts/updates the mapping; "delete" removes it.
|
// "add" inserts/updates the mapping; "delete" removes it.
|
||||||
// Operations are idempotent — duplicate events from multiple indexers are harmless.
|
// Operations are idempotent — duplicate events from multiple indexers are harmless.
|
||||||
@@ -74,19 +141,40 @@ func (s *nameIndexState) onEvent(evt NameIndexEvent) {
|
|||||||
if evt.Name == "" || evt.PeerID == "" {
|
if evt.Name == "" || evt.PeerID == "" {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
nameLow := strings.ToLower(evt.Name)
|
||||||
s.indexMu.Lock()
|
s.indexMu.Lock()
|
||||||
defer s.indexMu.Unlock()
|
defer s.indexMu.Unlock()
|
||||||
switch evt.Action {
|
switch evt.Action {
|
||||||
case NameIndexAdd:
|
case NameIndexAdd:
|
||||||
if s.index[evt.Name] == nil {
|
// If the peer previously had a different name, clean up old trigrams.
|
||||||
s.index[evt.Name] = map[string]string{}
|
if old, ok := s.byPeer[evt.PeerID]; ok && old != nameLow {
|
||||||
|
s.removeTrigrams(old, evt.PeerID)
|
||||||
|
if s.byName[old] != nil {
|
||||||
|
delete(s.byName[old], evt.PeerID)
|
||||||
|
if len(s.byName[old]) == 0 {
|
||||||
|
delete(s.byName, old)
|
||||||
}
|
}
|
||||||
s.index[evt.Name][evt.PeerID] = evt.DID
|
}
|
||||||
|
}
|
||||||
|
if s.byName[nameLow] == nil {
|
||||||
|
s.byName[nameLow] = map[string]string{}
|
||||||
|
}
|
||||||
|
s.byName[nameLow][evt.PeerID] = evt.DID
|
||||||
|
s.byPeer[evt.PeerID] = nameLow
|
||||||
|
s.addTrigrams(nameLow, evt.PeerID)
|
||||||
|
|
||||||
case NameIndexDelete:
|
case NameIndexDelete:
|
||||||
if s.index[evt.Name] != nil {
|
// Use stored name so trigrams match exactly what was indexed.
|
||||||
delete(s.index[evt.Name], evt.PeerID)
|
name := nameLow
|
||||||
if len(s.index[evt.Name]) == 0 {
|
if stored, ok := s.byPeer[evt.PeerID]; ok {
|
||||||
delete(s.index, evt.Name)
|
name = stored
|
||||||
|
}
|
||||||
|
s.removeTrigrams(name, evt.PeerID)
|
||||||
|
delete(s.byPeer, evt.PeerID)
|
||||||
|
if s.byName[name] != nil {
|
||||||
|
delete(s.byName[name], evt.PeerID)
|
||||||
|
if len(s.byName[name]) == 0 {
|
||||||
|
delete(s.byName, name)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -96,10 +184,22 @@ func (s *nameIndexState) onEvent(evt NameIndexEvent) {
|
|||||||
// Must be called after ix.PS is ready.
|
// Must be called after ix.PS is ready.
|
||||||
func (ix *IndexerService) initNameIndex(ps *pubsub.PubSub) {
|
func (ix *IndexerService) initNameIndex(ps *pubsub.PubSub) {
|
||||||
logger := oclib.GetLogger()
|
logger := oclib.GetLogger()
|
||||||
ix.nameIndex = &nameIndexState{
|
state := &nameIndexState{
|
||||||
index: map[string]map[string]string{},
|
byName: map[string]map[string]string{},
|
||||||
|
byPeer: map[string]string{},
|
||||||
|
trigrams: map[string]map[string]struct{}{},
|
||||||
emitted: map[string]time.Time{},
|
emitted: map[string]time.Time{},
|
||||||
}
|
}
|
||||||
|
ix.nameIndex = state
|
||||||
|
|
||||||
|
// Periodically purge the emitted dedup map so it doesn't grow forever.
|
||||||
|
go func() {
|
||||||
|
t := time.NewTicker(nameIndexDedupWindow)
|
||||||
|
defer t.Stop()
|
||||||
|
for range t.C {
|
||||||
|
state.purgeEmitted()
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
ps.RegisterTopicValidator(TopicNameIndex, func(_ context.Context, _ pp.ID, _ *pubsub.Message) bool {
|
ps.RegisterTopicValidator(TopicNameIndex, func(_ context.Context, _ pp.ID, _ *pubsub.Message) bool {
|
||||||
return true
|
return true
|
||||||
@@ -149,23 +249,72 @@ func (ix *IndexerService) publishNameEvent(action NameIndexAction, name, peerID,
|
|||||||
|
|
||||||
// LookupNameIndex searches the distributed name index for peers whose name
|
// LookupNameIndex searches the distributed name index for peers whose name
|
||||||
// contains needle (case-insensitive). Returns peerID → DID for matched peers.
|
// contains needle (case-insensitive). Returns peerID → DID for matched peers.
|
||||||
// Returns nil if the name index is not initialised (e.g. native indexers).
|
// Returns nil if the name index is not initialised.
|
||||||
|
//
|
||||||
|
// Algorithm:
|
||||||
|
// - needle ≥ 3 chars: trigram intersection → O(|candidates|) verify pass.
|
||||||
|
// The trigram index immediately narrows the candidate set; false positives
|
||||||
|
// are eliminated by the full-string contains check.
|
||||||
|
// - needle < 3 chars: linear scan of byName (rare, still fast at small N).
|
||||||
func (ix *IndexerService) LookupNameIndex(needle string) map[string]string {
|
func (ix *IndexerService) LookupNameIndex(needle string) map[string]string {
|
||||||
if ix.nameIndex == nil {
|
if ix.nameIndex == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
result := map[string]string{}
|
|
||||||
needleLow := strings.ToLower(needle)
|
needleLow := strings.ToLower(needle)
|
||||||
|
result := map[string]string{}
|
||||||
|
|
||||||
ix.nameIndex.indexMu.RLock()
|
ix.nameIndex.indexMu.RLock()
|
||||||
defer ix.nameIndex.indexMu.RUnlock()
|
defer ix.nameIndex.indexMu.RUnlock()
|
||||||
for name, peers := range ix.nameIndex.index {
|
|
||||||
fmt.Println(strings.Contains(strings.ToLower(name), needleLow), needleLow, strings.ToLower(name))
|
if len(needleLow) < 3 {
|
||||||
if strings.Contains(strings.ToLower(name), needleLow) {
|
// Short needle: linear scan fallback.
|
||||||
|
for name, peers := range ix.nameIndex.byName {
|
||||||
|
if strings.Contains(name, needleLow) {
|
||||||
for peerID, did := range peers {
|
for peerID, did := range peers {
|
||||||
result[peerID] = did
|
result[peerID] = did
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fmt.Println("RESULT", result)
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// Trigram intersection: start with the first trigram's set, then
|
||||||
|
// progressively intersect with each subsequent trigram's set.
|
||||||
|
tgs := trigramsOf(needleLow)
|
||||||
|
var candidates map[string]struct{}
|
||||||
|
for _, tg := range tgs {
|
||||||
|
set := ix.nameIndex.trigrams[tg]
|
||||||
|
if len(set) == 0 {
|
||||||
|
return result // any empty trigram set → no possible match
|
||||||
|
}
|
||||||
|
if candidates == nil {
|
||||||
|
candidates = make(map[string]struct{}, len(set))
|
||||||
|
for pid := range set {
|
||||||
|
candidates[pid] = struct{}{}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for pid := range candidates {
|
||||||
|
if _, ok := set[pid]; !ok {
|
||||||
|
delete(candidates, pid)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(candidates) == 0 {
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Full-string verification pass: trigrams admit false positives
|
||||||
|
// (e.g. "abc" and "bca" share the trigram "bc_" with a rotated name).
|
||||||
|
for peerID := range candidates {
|
||||||
|
name := ix.nameIndex.byPeer[peerID]
|
||||||
|
if strings.Contains(name, needleLow) {
|
||||||
|
did := ""
|
||||||
|
if m := ix.nameIndex.byName[name]; m != nil {
|
||||||
|
did = m[peerID]
|
||||||
|
}
|
||||||
|
result[peerID] = did
|
||||||
|
}
|
||||||
|
}
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,783 +0,0 @@
|
|||||||
package indexer
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"encoding/json"
|
|
||||||
"errors"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"math/rand"
|
|
||||||
"slices"
|
|
||||||
"strings"
|
|
||||||
"sync"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"oc-discovery/daemons/node/common"
|
|
||||||
|
|
||||||
oclib "cloud.o-forge.io/core/oc-lib"
|
|
||||||
pubsub "github.com/libp2p/go-libp2p-pubsub"
|
|
||||||
"github.com/libp2p/go-libp2p/core/crypto"
|
|
||||||
"github.com/libp2p/go-libp2p/core/network"
|
|
||||||
pp "github.com/libp2p/go-libp2p/core/peer"
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
|
||||||
// IndexerTTL is the lifetime of a live-indexer cache entry. Set to 50% above
|
|
||||||
// the recommended 60s heartbeat interval so a single delayed renewal does not
|
|
||||||
// evict a healthy indexer from the native's cache.
|
|
||||||
IndexerTTL = 90 * time.Second
|
|
||||||
// offloadInterval is how often the native checks if it can release responsible peers.
|
|
||||||
offloadInterval = 30 * time.Second
|
|
||||||
// dhtRefreshInterval is how often the background goroutine queries the DHT for
|
|
||||||
// known-but-expired indexer entries (written by neighbouring natives).
|
|
||||||
dhtRefreshInterval = 30 * time.Second
|
|
||||||
// maxFallbackPeers caps how many peers the native will accept in self-delegation
|
|
||||||
// mode. Beyond this limit the native refuses to act as a fallback indexer so it
|
|
||||||
// is not overwhelmed during prolonged indexer outages.
|
|
||||||
maxFallbackPeers = 50
|
|
||||||
)
|
|
||||||
|
|
||||||
// liveIndexerEntry tracks a registered indexer in the native's in-memory cache and DHT.
|
|
||||||
// PubKey and Signature are forwarded from the IndexerRegistration so the DHT validator
|
|
||||||
// can verify that the entry was produced by the peer owning the declared PeerID.
|
|
||||||
// FillRate is the fraction of capacity used (0=empty, 1=full) at last registration.
|
|
||||||
type liveIndexerEntry struct {
|
|
||||||
PeerID string `json:"peer_id"`
|
|
||||||
Addr string `json:"addr"`
|
|
||||||
ExpiresAt time.Time `json:"expires_at"`
|
|
||||||
RegTimestamp int64 `json:"reg_ts,omitempty"` // Timestamp from the original IndexerRegistration
|
|
||||||
PubKey []byte `json:"pub_key,omitempty"`
|
|
||||||
Signature []byte `json:"sig,omitempty"`
|
|
||||||
FillRate float64 `json:"fill_rate,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// NativeState holds runtime state specific to native indexer operation.
|
|
||||||
type NativeState struct {
|
|
||||||
liveIndexers map[string]*liveIndexerEntry // keyed by PeerID, local cache with TTL
|
|
||||||
liveIndexersMu sync.RWMutex
|
|
||||||
responsiblePeers map[pp.ID]struct{} // peers for which the native is fallback indexer
|
|
||||||
responsibleMu sync.RWMutex
|
|
||||||
// knownPeerIDs accumulates all indexer PeerIDs ever seen (local stream or gossip).
|
|
||||||
// Used by refreshIndexersFromDHT to re-hydrate expired entries from the shared DHT,
|
|
||||||
// including entries written by other natives.
|
|
||||||
knownPeerIDs map[string]string
|
|
||||||
knownMu sync.RWMutex
|
|
||||||
|
|
||||||
// cancel stops background goroutines (runOffloadLoop, refreshIndexersFromDHT)
|
|
||||||
// when the native shuts down.
|
|
||||||
cancel context.CancelFunc
|
|
||||||
}
|
|
||||||
|
|
||||||
func newNativeState(cancel context.CancelFunc) *NativeState {
|
|
||||||
return &NativeState{
|
|
||||||
liveIndexers: map[string]*liveIndexerEntry{},
|
|
||||||
responsiblePeers: map[pp.ID]struct{}{},
|
|
||||||
knownPeerIDs: map[string]string{},
|
|
||||||
cancel: cancel,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// IndexerRecordValidator validates indexer DHT entries under the "indexer" namespace.
|
|
||||||
type IndexerRecordValidator struct{}
|
|
||||||
|
|
||||||
func (v IndexerRecordValidator) Validate(_ string, value []byte) error {
|
|
||||||
var e liveIndexerEntry
|
|
||||||
if err := json.Unmarshal(value, &e); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if e.Addr == "" {
|
|
||||||
return errors.New("missing addr")
|
|
||||||
}
|
|
||||||
if e.ExpiresAt.Before(time.Now().UTC()) {
|
|
||||||
return errors.New("expired indexer record")
|
|
||||||
}
|
|
||||||
// Verify self-signature when present — rejects entries forged by a
|
|
||||||
// compromised native that does not control the declared PeerID.
|
|
||||||
if len(e.Signature) > 0 && len(e.PubKey) > 0 {
|
|
||||||
pub, err := crypto.UnmarshalPublicKey(e.PubKey)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("indexer entry: invalid public key: %w", err)
|
|
||||||
}
|
|
||||||
payload := []byte(fmt.Sprintf("%s|%s|%d", e.PeerID, e.Addr, e.RegTimestamp))
|
|
||||||
if ok, err := pub.Verify(payload, e.Signature); err != nil || !ok {
|
|
||||||
return errors.New("indexer entry: invalid signature")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (v IndexerRecordValidator) Select(_ string, values [][]byte) (int, error) {
|
|
||||||
var newest time.Time
|
|
||||||
index := 0
|
|
||||||
for i, val := range values {
|
|
||||||
var e liveIndexerEntry
|
|
||||||
if err := json.Unmarshal(val, &e); err != nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if e.ExpiresAt.After(newest) {
|
|
||||||
newest = e.ExpiresAt
|
|
||||||
index = i
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return index, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// InitNative registers native-specific stream handlers and starts background loops.
|
|
||||||
// Must be called after DHT is initialized.
|
|
||||||
func (ix *IndexerService) InitNative() {
|
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
|
||||||
ix.Native = newNativeState(cancel)
|
|
||||||
ix.Host.SetStreamHandler(common.ProtocolHeartbeat, ix.HandleHeartbeat) // specific heartbeat for Indexer.
|
|
||||||
ix.Host.SetStreamHandler(common.ProtocolNativeSubscription, ix.handleNativeSubscription)
|
|
||||||
ix.Host.SetStreamHandler(common.ProtocolNativeUnsubscribe, ix.handleNativeUnsubscribe)
|
|
||||||
ix.Host.SetStreamHandler(common.ProtocolNativeGetIndexers, ix.handleNativeGetIndexers)
|
|
||||||
ix.Host.SetStreamHandler(common.ProtocolNativeConsensus, ix.handleNativeConsensus)
|
|
||||||
ix.Host.SetStreamHandler(common.ProtocolNativeGetPeers, ix.handleNativeGetPeers)
|
|
||||||
ix.Host.SetStreamHandler(common.ProtocolIndexerGetNatives, ix.handleGetNatives)
|
|
||||||
ix.subscribeIndexerRegistry()
|
|
||||||
// Ensure long connections to other configured natives (native-to-native mesh).
|
|
||||||
common.EnsureNativePeers(ix.Host)
|
|
||||||
go ix.runOffloadLoop(ctx)
|
|
||||||
go ix.refreshIndexersFromDHT(ctx)
|
|
||||||
}
|
|
||||||
|
|
||||||
// subscribeIndexerRegistry joins the PubSub topic used by natives to gossip newly
|
|
||||||
// registered indexer PeerIDs to one another, enabling cross-native DHT discovery.
|
|
||||||
func (ix *IndexerService) subscribeIndexerRegistry() {
|
|
||||||
logger := oclib.GetLogger()
|
|
||||||
ix.PS.RegisterTopicValidator(common.TopicIndexerRegistry, func(_ context.Context, _ pp.ID, msg *pubsub.Message) bool {
|
|
||||||
// Parse as a signed IndexerRegistration.
|
|
||||||
var reg common.IndexerRegistration
|
|
||||||
if err := json.Unmarshal(msg.Data, ®); err != nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
if reg.Addr == "" {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
if _, err := pp.AddrInfoFromString(reg.Addr); err != nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
// Verify the self-signature when present (rejects forged gossip from a
|
|
||||||
// compromised native that does not control the announced PeerID).
|
|
||||||
if ok, _ := reg.Verify(); !ok {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
// Accept only messages from known native peers or from this host itself.
|
|
||||||
// This prevents external PSK participants from injecting registry entries.
|
|
||||||
from := msg.GetFrom()
|
|
||||||
if from == ix.Host.ID() {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
common.StreamNativeMu.RLock()
|
|
||||||
_, knownNative := common.StaticNatives[from.String()]
|
|
||||||
if !knownNative {
|
|
||||||
for _, ad := range common.StaticNatives {
|
|
||||||
if ad.ID == from {
|
|
||||||
knownNative = true
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
common.StreamNativeMu.RUnlock()
|
|
||||||
return knownNative
|
|
||||||
})
|
|
||||||
topic, err := ix.PS.Join(common.TopicIndexerRegistry)
|
|
||||||
if err != nil {
|
|
||||||
logger.Err(err).Msg("native: failed to join indexer registry topic")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
sub, err := topic.Subscribe()
|
|
||||||
if err != nil {
|
|
||||||
logger.Err(err).Msg("native: failed to subscribe to indexer registry topic")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
ix.PubsubMu.Lock()
|
|
||||||
ix.LongLivedPubSubs[common.TopicIndexerRegistry] = topic
|
|
||||||
ix.PubsubMu.Unlock()
|
|
||||||
|
|
||||||
go func() {
|
|
||||||
for {
|
|
||||||
msg, err := sub.Next(context.Background())
|
|
||||||
if err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
// The gossip payload is a JSON-encoded IndexerRegistration (signed).
|
|
||||||
var gossipReg common.IndexerRegistration
|
|
||||||
if jsonErr := json.Unmarshal(msg.Data, &gossipReg); jsonErr != nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if gossipReg.Addr == "" || gossipReg.PeerID == "" {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
// A neighbouring native registered this PeerID; add to known set for DHT refresh.
|
|
||||||
ix.Native.knownMu.Lock()
|
|
||||||
ix.Native.knownPeerIDs[gossipReg.PeerID] = gossipReg.Addr
|
|
||||||
ix.Native.knownMu.Unlock()
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
}
|
|
||||||
|
|
||||||
// handleNativeSubscription stores an indexer's alive registration in the local cache
|
|
||||||
// immediately, then persists it to the DHT asynchronously.
|
|
||||||
// The stream is temporary: indexer sends one IndexerRegistration and closes.
|
|
||||||
func (ix *IndexerService) handleNativeSubscription(s network.Stream) {
|
|
||||||
defer s.Close()
|
|
||||||
logger := oclib.GetLogger()
|
|
||||||
|
|
||||||
logger.Info().Msg("Subscription")
|
|
||||||
for {
|
|
||||||
var reg common.IndexerRegistration
|
|
||||||
if err := json.NewDecoder(s).Decode(®); err != nil {
|
|
||||||
logger.Err(err).Msg("native subscription: decode")
|
|
||||||
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) ||
|
|
||||||
strings.Contains(err.Error(), "reset") ||
|
|
||||||
strings.Contains(err.Error(), "closed") ||
|
|
||||||
strings.Contains(err.Error(), "too many connections") {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
logger.Info().Msg("Subscription " + reg.Addr)
|
|
||||||
|
|
||||||
if reg.Addr == "" {
|
|
||||||
logger.Error().Msg("native subscription: missing addr")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if reg.PeerID == "" {
|
|
||||||
ad, err := pp.AddrInfoFromString(reg.Addr)
|
|
||||||
if err != nil {
|
|
||||||
logger.Err(err).Msg("native subscription: invalid addr")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
reg.PeerID = ad.ID.String()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Reject registrations with an invalid self-signature.
|
|
||||||
if ok, err := reg.Verify(); !ok {
|
|
||||||
logger.Warn().Str("peer", reg.PeerID).Err(err).Msg("native subscription: invalid signature, rejecting")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Build entry with a fresh TTL — must happen before the cache write so the
|
|
||||||
// TTL window is not consumed by DHT retries.
|
|
||||||
entry := &liveIndexerEntry{
|
|
||||||
PeerID: reg.PeerID,
|
|
||||||
Addr: reg.Addr,
|
|
||||||
ExpiresAt: time.Now().UTC().Add(IndexerTTL),
|
|
||||||
RegTimestamp: reg.Timestamp,
|
|
||||||
PubKey: reg.PubKey,
|
|
||||||
Signature: reg.Signature,
|
|
||||||
FillRate: reg.FillRate,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Verify that the declared address is actually reachable before admitting
|
|
||||||
// the registration. This async dial runs in the background; the indexer is
|
|
||||||
// tentatively admitted immediately (so heartbeats don't get stuck) but is
|
|
||||||
// evicted from the cache if the dial fails within 5 s.
|
|
||||||
go func(e *liveIndexerEntry) {
|
|
||||||
ad, err := pp.AddrInfoFromString(e.Addr)
|
|
||||||
if err != nil {
|
|
||||||
logger.Warn().Str("addr", e.Addr).Msg("native subscription: invalid addr during validation, rejecting")
|
|
||||||
ix.Native.liveIndexersMu.Lock()
|
|
||||||
if cur := ix.Native.liveIndexers[e.PeerID]; cur == e {
|
|
||||||
delete(ix.Native.liveIndexers, e.PeerID)
|
|
||||||
}
|
|
||||||
ix.Native.liveIndexersMu.Unlock()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
dialCtx, dialCancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
||||||
defer dialCancel()
|
|
||||||
if err := ix.Host.Connect(dialCtx, *ad); err != nil {
|
|
||||||
logger.Warn().Str("addr", e.Addr).Err(err).Msg("native subscription: declared address unreachable, rejecting")
|
|
||||||
ix.Native.liveIndexersMu.Lock()
|
|
||||||
if cur := ix.Native.liveIndexers[e.PeerID]; cur == e {
|
|
||||||
delete(ix.Native.liveIndexers, e.PeerID)
|
|
||||||
}
|
|
||||||
ix.Native.liveIndexersMu.Unlock()
|
|
||||||
}
|
|
||||||
}(entry)
|
|
||||||
|
|
||||||
// Update local cache and known set immediately so concurrent GetIndexers calls
|
|
||||||
// can already see this indexer without waiting for the DHT write to complete.
|
|
||||||
ix.Native.liveIndexersMu.Lock()
|
|
||||||
_, isRenewal := ix.Native.liveIndexers[reg.PeerID]
|
|
||||||
ix.Native.liveIndexers[reg.PeerID] = entry
|
|
||||||
ix.Native.liveIndexersMu.Unlock()
|
|
||||||
|
|
||||||
ix.Native.knownMu.Lock()
|
|
||||||
ix.Native.knownPeerIDs[reg.PeerID] = reg.Addr
|
|
||||||
ix.Native.knownMu.Unlock()
|
|
||||||
|
|
||||||
// Gossip the signed registration to neighbouring natives.
|
|
||||||
// The payload is JSON-encoded so the receiver can verify the self-signature.
|
|
||||||
ix.PubsubMu.RLock()
|
|
||||||
topic := ix.LongLivedPubSubs[common.TopicIndexerRegistry]
|
|
||||||
ix.PubsubMu.RUnlock()
|
|
||||||
if topic != nil {
|
|
||||||
if gossipData, marshalErr := json.Marshal(reg); marshalErr == nil {
|
|
||||||
if err := topic.Publish(context.Background(), gossipData); err != nil {
|
|
||||||
logger.Err(err).Msg("native subscription: registry gossip publish")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if isRenewal {
|
|
||||||
// logger.Debug().Str("peer", reg.PeerID).Msg("native: indexer TTL renewed : " + fmt.Sprintf("%v", len(ix.Native.liveIndexers)))
|
|
||||||
} else {
|
|
||||||
logger.Info().Str("peer", reg.PeerID).Msg("native: indexer registered : " + fmt.Sprintf("%v", len(ix.Native.liveIndexers)))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Persist in DHT asynchronously with bounded retry.
|
|
||||||
// Max retry window = IndexerTTL (90 s) — retrying past entry expiry is pointless.
|
|
||||||
// Backoff: 10 s → 20 s → 40 s, then repeats at 40 s until deadline.
|
|
||||||
key := ix.genIndexerKey(reg.PeerID)
|
|
||||||
data, err := json.Marshal(entry)
|
|
||||||
if err != nil {
|
|
||||||
logger.Err(err).Msg("native subscription: marshal entry")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
go func() {
|
|
||||||
deadline := time.Now().Add(IndexerTTL)
|
|
||||||
backoff := 10 * time.Second
|
|
||||||
for {
|
|
||||||
if time.Now().After(deadline) {
|
|
||||||
logger.Warn().Str("key", key).Msg("native subscription: DHT put abandoned, entry TTL exceeded")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
|
||||||
err := ix.DHT.PutValue(ctx, key, data)
|
|
||||||
cancel()
|
|
||||||
if err == nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
logger.Err(err).Msg("native subscription: DHT put " + key)
|
|
||||||
if !strings.Contains(err.Error(), "failed to find any peer in table") {
|
|
||||||
return // non-retryable error
|
|
||||||
}
|
|
||||||
remaining := time.Until(deadline)
|
|
||||||
if backoff > remaining {
|
|
||||||
backoff = remaining
|
|
||||||
}
|
|
||||||
if backoff <= 0 {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
time.Sleep(backoff)
|
|
||||||
if backoff < 40*time.Second {
|
|
||||||
backoff *= 2
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// handleNativeUnsubscribe removes a departing indexer from the local cache and
|
|
||||||
// known set immediately, without waiting for TTL expiry.
|
|
||||||
func (ix *IndexerService) handleNativeUnsubscribe(s network.Stream) {
|
|
||||||
defer s.Close()
|
|
||||||
logger := oclib.GetLogger()
|
|
||||||
var reg common.IndexerRegistration
|
|
||||||
if err := json.NewDecoder(s).Decode(®); err != nil {
|
|
||||||
logger.Err(err).Msg("native unsubscribe: decode")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if reg.PeerID == "" {
|
|
||||||
logger.Warn().Msg("native unsubscribe: missing peer_id")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
ix.Native.liveIndexersMu.Lock()
|
|
||||||
delete(ix.Native.liveIndexers, reg.PeerID)
|
|
||||||
ix.Native.liveIndexersMu.Unlock()
|
|
||||||
ix.Native.knownMu.Lock()
|
|
||||||
delete(ix.Native.knownPeerIDs, reg.PeerID)
|
|
||||||
ix.Native.knownMu.Unlock()
|
|
||||||
logger.Info().Str("peer", reg.PeerID).Msg("native: indexer explicitly unregistered")
|
|
||||||
}
|
|
||||||
|
|
||||||
// handleNativeGetIndexers returns this native's own list of reachable indexers.
|
|
||||||
// Self-delegation (native acting as temporary fallback indexer) is only permitted
|
|
||||||
// for nodes — never for peers that are themselves registered indexers in knownPeerIDs.
|
|
||||||
// The consensus across natives is the responsibility of the requesting node/indexer.
|
|
||||||
func (ix *IndexerService) handleNativeGetIndexers(s network.Stream) {
|
|
||||||
defer s.Close()
|
|
||||||
logger := oclib.GetLogger()
|
|
||||||
for {
|
|
||||||
var req common.GetIndexersRequest
|
|
||||||
if err := json.NewDecoder(s).Decode(&req); err != nil {
|
|
||||||
logger.Err(err).Msg("native get indexers: decode")
|
|
||||||
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) ||
|
|
||||||
strings.Contains(err.Error(), "reset") ||
|
|
||||||
strings.Contains(err.Error(), "closed") ||
|
|
||||||
strings.Contains(err.Error(), "too many connections") {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if req.Count <= 0 {
|
|
||||||
req.Count = 3
|
|
||||||
}
|
|
||||||
callerPeerID := s.Conn().RemotePeer().String()
|
|
||||||
reachable := ix.reachableLiveIndexers(req.Count, callerPeerID)
|
|
||||||
var resp common.GetIndexersResponse
|
|
||||||
|
|
||||||
if len(reachable) == 0 {
|
|
||||||
// No live indexers reachable — try to self-delegate.
|
|
||||||
if ix.selfDelegate(s.Conn().RemotePeer(), &resp) {
|
|
||||||
logger.Info().Str("peer", callerPeerID).Msg("native: no indexers, acting as fallback for node")
|
|
||||||
} else {
|
|
||||||
// Fallback pool saturated: return empty so the caller retries another
|
|
||||||
// native instead of piling more load onto this one.
|
|
||||||
logger.Warn().Str("peer", callerPeerID).Int("pool", maxFallbackPeers).Msg(
|
|
||||||
"native: fallback pool saturated, refusing self-delegation")
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Sort by fill rate ascending so less-full indexers are preferred for routing.
|
|
||||||
ix.Native.liveIndexersMu.RLock()
|
|
||||||
fillRates := make(map[string]float64, len(reachable))
|
|
||||||
for _, addr := range reachable {
|
|
||||||
ad, err := pp.AddrInfoFromString(addr)
|
|
||||||
if err != nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
for _, e := range ix.Native.liveIndexers {
|
|
||||||
if e.PeerID == ad.ID.String() {
|
|
||||||
fillRates[addr] = e.FillRate
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ix.Native.liveIndexersMu.RUnlock()
|
|
||||||
|
|
||||||
// Sort by routing weight descending: weight = fillRate × (1 − fillRate).
|
|
||||||
// This prefers indexers in the "trust sweet spot" — proven popular (fillRate > 0)
|
|
||||||
// but not saturated (fillRate < 1). Peak at fillRate ≈ 0.5.
|
|
||||||
routingWeight := func(addr string) float64 {
|
|
||||||
f := fillRates[addr]
|
|
||||||
return f * (1 - f)
|
|
||||||
}
|
|
||||||
for i := 1; i < len(reachable); i++ {
|
|
||||||
for j := i; j > 0 && routingWeight(reachable[j]) > routingWeight(reachable[j-1]); j-- {
|
|
||||||
reachable[j], reachable[j-1] = reachable[j-1], reachable[j]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if req.Count > len(reachable) {
|
|
||||||
req.Count = len(reachable)
|
|
||||||
}
|
|
||||||
resp.Indexers = reachable[:req.Count]
|
|
||||||
resp.FillRates = fillRates
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := json.NewEncoder(s).Encode(resp); err != nil {
|
|
||||||
logger.Err(err).Msg("native get indexers: encode response")
|
|
||||||
}
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// handleNativeConsensus answers a consensus challenge from a node/indexer.
|
|
||||||
// It returns:
|
|
||||||
// - Trusted: which of the candidates it considers alive.
|
|
||||||
// - Suggestions: extras it knows and trusts that were not in the candidate list.
|
|
||||||
func (ix *IndexerService) handleNativeConsensus(s network.Stream) {
|
|
||||||
defer s.Close()
|
|
||||||
logger := oclib.GetLogger()
|
|
||||||
for {
|
|
||||||
var req common.ConsensusRequest
|
|
||||||
if err := json.NewDecoder(s).Decode(&req); err != nil {
|
|
||||||
logger.Err(err).Msg("native consensus: decode")
|
|
||||||
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) ||
|
|
||||||
strings.Contains(err.Error(), "reset") ||
|
|
||||||
strings.Contains(err.Error(), "closed") ||
|
|
||||||
strings.Contains(err.Error(), "too many connections") {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
myList := ix.reachableLiveIndexers(-1, s.Conn().RemotePeer().String())
|
|
||||||
mySet := make(map[string]struct{}, len(myList))
|
|
||||||
for _, addr := range myList {
|
|
||||||
mySet[addr] = struct{}{}
|
|
||||||
}
|
|
||||||
|
|
||||||
trusted := []string{}
|
|
||||||
candidateSet := make(map[string]struct{}, len(req.Candidates))
|
|
||||||
for _, addr := range req.Candidates {
|
|
||||||
candidateSet[addr] = struct{}{}
|
|
||||||
if _, ok := mySet[addr]; ok {
|
|
||||||
trusted = append(trusted, addr) // candidate we also confirm as reachable
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extras we trust but that the requester didn't include → suggestions.
|
|
||||||
suggestions := []string{}
|
|
||||||
for _, addr := range myList {
|
|
||||||
if _, inCandidates := candidateSet[addr]; !inCandidates {
|
|
||||||
suggestions = append(suggestions, addr)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
resp := common.ConsensusResponse{Trusted: trusted, Suggestions: suggestions}
|
|
||||||
if err := json.NewEncoder(s).Encode(resp); err != nil {
|
|
||||||
logger.Err(err).Msg("native consensus: encode response")
|
|
||||||
}
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// selfDelegate marks the caller as a responsible peer and exposes this native's own
|
|
||||||
// address as its temporary indexer. Returns false when the fallback pool is saturated
|
|
||||||
// (maxFallbackPeers reached) — the caller must return an empty response so the node
|
|
||||||
// retries later instead of pinning indefinitely to an overloaded native.
|
|
||||||
func (ix *IndexerService) selfDelegate(remotePeer pp.ID, resp *common.GetIndexersResponse) bool {
|
|
||||||
ix.Native.responsibleMu.Lock()
|
|
||||||
defer ix.Native.responsibleMu.Unlock()
|
|
||||||
if len(ix.Native.responsiblePeers) >= maxFallbackPeers {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
ix.Native.responsiblePeers[remotePeer] = struct{}{}
|
|
||||||
resp.IsSelfFallback = true
|
|
||||||
resp.Indexers = []string{ix.Host.Addrs()[len(ix.Host.Addrs())-1].String() + "/p2p/" + ix.Host.ID().String()}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// reachableLiveIndexers returns the multiaddrs of non-expired, pingable indexers
|
|
||||||
// from the local cache (kept fresh by refreshIndexersFromDHT in background).
|
|
||||||
func (ix *IndexerService) reachableLiveIndexers(count int, from ...string) []string {
|
|
||||||
ix.Native.liveIndexersMu.RLock()
|
|
||||||
now := time.Now().UTC()
|
|
||||||
candidates := []*liveIndexerEntry{}
|
|
||||||
for _, e := range ix.Native.liveIndexers {
|
|
||||||
fmt.Println("liveIndexers", slices.Contains(from, e.PeerID), from, e.PeerID)
|
|
||||||
if e.ExpiresAt.After(now) && !slices.Contains(from, e.PeerID) {
|
|
||||||
candidates = append(candidates, e)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ix.Native.liveIndexersMu.RUnlock()
|
|
||||||
|
|
||||||
fmt.Println("midway...", candidates, from, ix.Native.knownPeerIDs)
|
|
||||||
|
|
||||||
if (count > 0 && len(candidates) < count) || count < 0 {
|
|
||||||
ix.Native.knownMu.RLock()
|
|
||||||
for k, v := range ix.Native.knownPeerIDs {
|
|
||||||
// Include peers whose liveIndexers entry is absent OR expired.
|
|
||||||
// A non-nil but expired entry means the peer was once known but
|
|
||||||
// has since timed out — PeerIsAlive below will decide if it's back.
|
|
||||||
fmt.Println("knownPeerIDs", slices.Contains(from, k), from, k)
|
|
||||||
if !slices.Contains(from, k) {
|
|
||||||
candidates = append(candidates, &liveIndexerEntry{
|
|
||||||
PeerID: k,
|
|
||||||
Addr: v,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ix.Native.knownMu.RUnlock()
|
|
||||||
}
|
|
||||||
|
|
||||||
fmt.Println("midway...1", candidates)
|
|
||||||
|
|
||||||
reachable := []string{}
|
|
||||||
for _, e := range candidates {
|
|
||||||
ad, err := pp.AddrInfoFromString(e.Addr)
|
|
||||||
if err != nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if common.PeerIsAlive(ix.Host, *ad) {
|
|
||||||
reachable = append(reachable, e.Addr)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return reachable
|
|
||||||
}
|
|
||||||
|
|
||||||
// refreshIndexersFromDHT runs in background and queries the shared DHT for every known
|
|
||||||
// indexer PeerID whose local cache entry is missing or expired. This supplements the
|
|
||||||
// local cache with entries written by neighbouring natives.
|
|
||||||
func (ix *IndexerService) refreshIndexersFromDHT(ctx context.Context) {
|
|
||||||
t := time.NewTicker(dhtRefreshInterval)
|
|
||||||
defer t.Stop()
|
|
||||||
logger := oclib.GetLogger()
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case <-ctx.Done():
|
|
||||||
return
|
|
||||||
case <-t.C:
|
|
||||||
}
|
|
||||||
ix.Native.knownMu.RLock()
|
|
||||||
peerIDs := make([]string, 0, len(ix.Native.knownPeerIDs))
|
|
||||||
for pid := range ix.Native.knownPeerIDs {
|
|
||||||
peerIDs = append(peerIDs, pid)
|
|
||||||
}
|
|
||||||
ix.Native.knownMu.RUnlock()
|
|
||||||
|
|
||||||
now := time.Now().UTC()
|
|
||||||
for _, pid := range peerIDs {
|
|
||||||
ix.Native.liveIndexersMu.RLock()
|
|
||||||
existing := ix.Native.liveIndexers[pid]
|
|
||||||
ix.Native.liveIndexersMu.RUnlock()
|
|
||||||
if existing != nil && existing.ExpiresAt.After(now) {
|
|
||||||
continue // still fresh in local cache
|
|
||||||
}
|
|
||||||
key := ix.genIndexerKey(pid)
|
|
||||||
dhtCtx, dhtCancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
||||||
ch, err := ix.DHT.SearchValue(dhtCtx, key)
|
|
||||||
if err != nil {
|
|
||||||
dhtCancel()
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
var best *liveIndexerEntry
|
|
||||||
for b := range ch {
|
|
||||||
var e liveIndexerEntry
|
|
||||||
if err := json.Unmarshal(b, &e); err != nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if e.ExpiresAt.After(time.Now().UTC()) {
|
|
||||||
if best == nil || e.ExpiresAt.After(best.ExpiresAt) {
|
|
||||||
best = &e
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
dhtCancel()
|
|
||||||
if best != nil {
|
|
||||||
ix.Native.liveIndexersMu.Lock()
|
|
||||||
ix.Native.liveIndexers[best.PeerID] = best
|
|
||||||
ix.Native.liveIndexersMu.Unlock()
|
|
||||||
logger.Info().Str("peer", best.PeerID).Msg("native: refreshed indexer from DHT")
|
|
||||||
} else {
|
|
||||||
// DHT has no fresh entry — peer is gone, prune from known set.
|
|
||||||
ix.Native.knownMu.Lock()
|
|
||||||
delete(ix.Native.knownPeerIDs, pid)
|
|
||||||
ix.Native.knownMu.Unlock()
|
|
||||||
logger.Info().Str("peer", pid).Msg("native: pruned stale peer from knownPeerIDs")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (ix *IndexerService) genIndexerKey(peerID string) string {
|
|
||||||
return "/indexer/" + peerID
|
|
||||||
}
|
|
||||||
|
|
||||||
// runOffloadLoop periodically checks if real indexers are available and releases
|
|
||||||
// responsible peers so they can reconnect to actual indexers on their next attempt.
|
|
||||||
func (ix *IndexerService) runOffloadLoop(ctx context.Context) {
|
|
||||||
t := time.NewTicker(offloadInterval)
|
|
||||||
defer t.Stop()
|
|
||||||
logger := oclib.GetLogger()
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case <-ctx.Done():
|
|
||||||
return
|
|
||||||
case <-t.C:
|
|
||||||
}
|
|
||||||
fmt.Println("runOffloadLoop", ix.Native.responsiblePeers)
|
|
||||||
ix.Native.responsibleMu.RLock()
|
|
||||||
count := len(ix.Native.responsiblePeers)
|
|
||||||
ix.Native.responsibleMu.RUnlock()
|
|
||||||
if count == 0 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
ix.Native.responsibleMu.RLock()
|
|
||||||
peerIDS := []string{}
|
|
||||||
for p := range ix.Native.responsiblePeers {
|
|
||||||
peerIDS = append(peerIDS, p.String())
|
|
||||||
}
|
|
||||||
fmt.Println("COUNT --> ", count, len(ix.reachableLiveIndexers(-1, peerIDS...)))
|
|
||||||
ix.Native.responsibleMu.RUnlock()
|
|
||||||
if len(ix.reachableLiveIndexers(-1, peerIDS...)) > 0 {
|
|
||||||
ix.Native.responsibleMu.RLock()
|
|
||||||
released := ix.Native.responsiblePeers
|
|
||||||
ix.Native.responsibleMu.RUnlock()
|
|
||||||
|
|
||||||
// Reset (not Close) heartbeat streams of released peers.
|
|
||||||
// Close() only half-closes the native's write direction — the peer's write
|
|
||||||
// direction stays open and sendHeartbeat never sees an error.
|
|
||||||
// Reset() abruptly terminates both directions, making the peer's next
|
|
||||||
// json.Encode return an error which triggers replenishIndexersFromNative.
|
|
||||||
ix.StreamMU.Lock()
|
|
||||||
if streams := ix.StreamRecords[common.ProtocolHeartbeat]; streams != nil {
|
|
||||||
for pid := range released {
|
|
||||||
if rec, ok := streams[pid]; ok {
|
|
||||||
if rec.HeartbeatStream != nil && rec.HeartbeatStream.Stream != nil {
|
|
||||||
rec.HeartbeatStream.Stream.Reset()
|
|
||||||
}
|
|
||||||
ix.Native.responsibleMu.Lock()
|
|
||||||
delete(ix.Native.responsiblePeers, pid)
|
|
||||||
ix.Native.responsibleMu.Unlock()
|
|
||||||
|
|
||||||
delete(streams, pid)
|
|
||||||
logger.Info().Str("peer", pid.String()).Str("proto", string(common.ProtocolHeartbeat)).Msg(
|
|
||||||
"native: offload — stream reset, peer will reconnect to real indexer")
|
|
||||||
} else {
|
|
||||||
// No recorded heartbeat stream for this peer: either it never
|
|
||||||
// passed the score check (new peer, uptime=0 → score<75) or the
|
|
||||||
// stream was GC'd. We cannot send a Reset signal, so close the
|
|
||||||
// whole connection instead — this makes the peer's sendHeartbeat
|
|
||||||
// return an error, which triggers replenishIndexersFromNative and
|
|
||||||
// migrates it to a real indexer.
|
|
||||||
ix.Native.responsibleMu.Lock()
|
|
||||||
delete(ix.Native.responsiblePeers, pid)
|
|
||||||
ix.Native.responsibleMu.Unlock()
|
|
||||||
go ix.Host.Network().ClosePeer(pid)
|
|
||||||
logger.Info().Str("peer", pid.String()).Msg(
|
|
||||||
"native: offload — no heartbeat stream, closing connection so peer re-requests real indexers")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
ix.StreamMU.Unlock()
|
|
||||||
|
|
||||||
logger.Info().Int("released", count).Msg("native: offloaded responsible peers to real indexers")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// handleNativeGetPeers returns a random selection of this native's known native
|
|
||||||
// contacts, excluding any in the request's Exclude list.
|
|
||||||
func (ix *IndexerService) handleNativeGetPeers(s network.Stream) {
|
|
||||||
defer s.Close()
|
|
||||||
logger := oclib.GetLogger()
|
|
||||||
for {
|
|
||||||
var req common.GetNativePeersRequest
|
|
||||||
if err := json.NewDecoder(s).Decode(&req); err != nil {
|
|
||||||
logger.Err(err).Msg("native get peers: decode")
|
|
||||||
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) ||
|
|
||||||
strings.Contains(err.Error(), "reset") ||
|
|
||||||
strings.Contains(err.Error(), "closed") ||
|
|
||||||
strings.Contains(err.Error(), "too many connections") {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if req.Count <= 0 {
|
|
||||||
req.Count = 1
|
|
||||||
}
|
|
||||||
|
|
||||||
excludeSet := make(map[string]struct{}, len(req.Exclude))
|
|
||||||
for _, e := range req.Exclude {
|
|
||||||
excludeSet[e] = struct{}{}
|
|
||||||
}
|
|
||||||
|
|
||||||
common.StreamNativeMu.RLock()
|
|
||||||
candidates := make([]string, 0, len(common.StaticNatives))
|
|
||||||
for addr := range common.StaticNatives {
|
|
||||||
if _, excluded := excludeSet[addr]; !excluded {
|
|
||||||
candidates = append(candidates, addr)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
common.StreamNativeMu.RUnlock()
|
|
||||||
|
|
||||||
rand.Shuffle(len(candidates), func(i, j int) { candidates[i], candidates[j] = candidates[j], candidates[i] })
|
|
||||||
if req.Count > len(candidates) {
|
|
||||||
req.Count = len(candidates)
|
|
||||||
}
|
|
||||||
|
|
||||||
resp := common.GetNativePeersResponse{Peers: candidates[:req.Count]}
|
|
||||||
if err := json.NewEncoder(s).Encode(resp); err != nil {
|
|
||||||
logger.Err(err).Msg("native get peers: encode response")
|
|
||||||
}
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// StartNativeRegistration starts a goroutine that periodically registers this
|
|
||||||
// indexer with all configured native indexers (every RecommendedHeartbeatInterval).
|
|
||||||
228
daemons/node/indexer/search.go
Normal file
228
daemons/node/indexer/search.go
Normal file
@@ -0,0 +1,228 @@
|
|||||||
|
package indexer
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"oc-discovery/conf"
|
||||||
|
"oc-discovery/daemons/node/common"
|
||||||
|
|
||||||
|
oclib "cloud.o-forge.io/core/oc-lib"
|
||||||
|
pp "github.com/libp2p/go-libp2p/core/peer"
|
||||||
|
"github.com/libp2p/go-libp2p/core/network"
|
||||||
|
)
|
||||||
|
|
||||||
|
const TopicSearchPeer = "oc-search-peer"
|
||||||
|
|
||||||
|
// searchTimeout returns the configured search timeout, defaulting to 5s.
|
||||||
|
func searchTimeout() time.Duration {
|
||||||
|
if t := conf.GetConfig().SearchTimeout; t > 0 {
|
||||||
|
return time.Duration(t) * time.Second
|
||||||
|
}
|
||||||
|
return 5 * time.Second
|
||||||
|
}
|
||||||
|
|
||||||
|
// initSearchHandlers registers ProtocolSearchPeer and ProtocolSearchPeerResponse
|
||||||
|
// and subscribes to TopicSearchPeer on GossipSub.
|
||||||
|
func (ix *IndexerService) initSearchHandlers() {
|
||||||
|
ix.Host.SetStreamHandler(common.ProtocolSearchPeer, ix.handleSearchPeer)
|
||||||
|
ix.Host.SetStreamHandler(common.ProtocolSearchPeerResponse, ix.handleSearchPeerResponse)
|
||||||
|
ix.initSearchSubscription()
|
||||||
|
}
|
||||||
|
|
||||||
|
// updateReferent is called from HandleHeartbeat when Referent flag changes.
|
||||||
|
// If referent=true the node is added to referencedNodes; if false it is removed.
|
||||||
|
func (ix *IndexerService) updateReferent(pid pp.ID, rec PeerRecord, referent bool) {
|
||||||
|
ix.referencedNodesMu.Lock()
|
||||||
|
defer ix.referencedNodesMu.Unlock()
|
||||||
|
if referent {
|
||||||
|
ix.referencedNodes[pid] = rec
|
||||||
|
} else {
|
||||||
|
delete(ix.referencedNodes, pid)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// searchReferenced looks up nodes in referencedNodes matching the query.
|
||||||
|
// Matches on peerID (exact), DID (exact), or name (case-insensitive contains).
|
||||||
|
func (ix *IndexerService) searchReferenced(peerID, did, name string) []common.SearchHit {
|
||||||
|
ix.referencedNodesMu.RLock()
|
||||||
|
defer ix.referencedNodesMu.RUnlock()
|
||||||
|
nameLow := strings.ToLower(name)
|
||||||
|
var hits []common.SearchHit
|
||||||
|
for pid, rec := range ix.referencedNodes {
|
||||||
|
pidStr := pid.String()
|
||||||
|
matchPeerID := peerID != "" && pidStr == peerID
|
||||||
|
matchDID := did != "" && rec.DID == did
|
||||||
|
matchName := name != "" && strings.Contains(strings.ToLower(rec.Name), nameLow)
|
||||||
|
if matchPeerID || matchDID || matchName {
|
||||||
|
hits = append(hits, common.SearchHit{
|
||||||
|
PeerID: pidStr,
|
||||||
|
DID: rec.DID,
|
||||||
|
Name: rec.Name,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return hits
|
||||||
|
}
|
||||||
|
|
||||||
|
// handleSearchPeer is the ProtocolSearchPeer handler.
|
||||||
|
// The node opens this stream, sends a SearchPeerRequest, and reads results
|
||||||
|
// as they stream in. The stream stays open until timeout or node closes it.
|
||||||
|
func (ix *IndexerService) handleSearchPeer(s network.Stream) {
|
||||||
|
logger := oclib.GetLogger()
|
||||||
|
defer s.Reset()
|
||||||
|
|
||||||
|
var req common.SearchPeerRequest
|
||||||
|
if err := json.NewDecoder(s).Decode(&req); err != nil || req.QueryID == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// streamCtx is cancelled when the node closes its end of the stream.
|
||||||
|
streamCtx, streamCancel := context.WithCancel(context.Background())
|
||||||
|
go func() {
|
||||||
|
// Block until the stream is reset/closed, then cancel our context.
|
||||||
|
buf := make([]byte, 1)
|
||||||
|
s.Read(buf) //nolint:errcheck — we only care about EOF/reset
|
||||||
|
streamCancel()
|
||||||
|
}()
|
||||||
|
defer streamCancel()
|
||||||
|
|
||||||
|
resultCh := make(chan []common.SearchHit, 16)
|
||||||
|
ix.pendingSearchesMu.Lock()
|
||||||
|
ix.pendingSearches[req.QueryID] = resultCh
|
||||||
|
ix.pendingSearchesMu.Unlock()
|
||||||
|
defer func() {
|
||||||
|
ix.pendingSearchesMu.Lock()
|
||||||
|
delete(ix.pendingSearches, req.QueryID)
|
||||||
|
ix.pendingSearchesMu.Unlock()
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Check own referencedNodes immediately.
|
||||||
|
if hits := ix.searchReferenced(req.PeerID, req.DID, req.Name); len(hits) > 0 {
|
||||||
|
resultCh <- hits
|
||||||
|
}
|
||||||
|
|
||||||
|
// Broadcast search on GossipSub so other indexers can respond.
|
||||||
|
ix.publishSearchQuery(req.QueryID, req.PeerID, req.DID, req.Name)
|
||||||
|
|
||||||
|
// Stream results back to node as they arrive; reset idle timer on each result.
|
||||||
|
enc := json.NewEncoder(s)
|
||||||
|
idleTimer := time.NewTimer(searchTimeout())
|
||||||
|
defer idleTimer.Stop()
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case hits := <-resultCh:
|
||||||
|
if err := enc.Encode(common.SearchPeerResult{QueryID: req.QueryID, Records: hits}); err != nil {
|
||||||
|
logger.Debug().Err(err).Msg("[search] stream write failed")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Reset idle timeout: keep alive as long as results trickle in.
|
||||||
|
if !idleTimer.Stop() {
|
||||||
|
select {
|
||||||
|
case <-idleTimer.C:
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
}
|
||||||
|
idleTimer.Reset(searchTimeout())
|
||||||
|
case <-idleTimer.C:
|
||||||
|
// No new result within timeout — close gracefully.
|
||||||
|
return
|
||||||
|
case <-streamCtx.Done():
|
||||||
|
// Node closed the stream (new search superseded this one).
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// handleSearchPeerResponse is the ProtocolSearchPeerResponse handler.
|
||||||
|
// Another indexer opens this stream to deliver hits for a pending queryID.
|
||||||
|
func (ix *IndexerService) handleSearchPeerResponse(s network.Stream) {
|
||||||
|
defer s.Reset()
|
||||||
|
var result common.SearchPeerResult
|
||||||
|
if err := json.NewDecoder(s).Decode(&result); err != nil || result.QueryID == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
ix.pendingSearchesMu.Lock()
|
||||||
|
ch := ix.pendingSearches[result.QueryID]
|
||||||
|
ix.pendingSearchesMu.Unlock()
|
||||||
|
if ch != nil {
|
||||||
|
select {
|
||||||
|
case ch <- result.Records:
|
||||||
|
default: // channel full, drop — node may be slow
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// publishSearchQuery broadcasts a SearchQuery on TopicSearchPeer.
|
||||||
|
func (ix *IndexerService) publishSearchQuery(queryID, peerID, did, name string) {
|
||||||
|
ix.LongLivedStreamRecordedService.LongLivedPubSubService.PubsubMu.RLock()
|
||||||
|
topic := ix.LongLivedStreamRecordedService.LongLivedPubSubService.LongLivedPubSubs[TopicSearchPeer]
|
||||||
|
ix.LongLivedStreamRecordedService.LongLivedPubSubService.PubsubMu.RUnlock()
|
||||||
|
if topic == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
q := common.SearchQuery{
|
||||||
|
QueryID: queryID,
|
||||||
|
PeerID: peerID,
|
||||||
|
DID: did,
|
||||||
|
Name: name,
|
||||||
|
EmitterID: ix.Host.ID().String(),
|
||||||
|
}
|
||||||
|
b, err := json.Marshal(q)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
_ = topic.Publish(context.Background(), b)
|
||||||
|
}
|
||||||
|
|
||||||
|
// initSearchSubscription joins TopicSearchPeer and dispatches incoming queries.
|
||||||
|
func (ix *IndexerService) initSearchSubscription() {
|
||||||
|
logger := oclib.GetLogger()
|
||||||
|
ix.LongLivedStreamRecordedService.LongLivedPubSubService.PubsubMu.Lock()
|
||||||
|
topic, err := ix.PS.Join(TopicSearchPeer)
|
||||||
|
if err != nil {
|
||||||
|
ix.LongLivedStreamRecordedService.LongLivedPubSubService.PubsubMu.Unlock()
|
||||||
|
logger.Err(err).Msg("[search] failed to join search topic")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
ix.LongLivedStreamRecordedService.LongLivedPubSubService.LongLivedPubSubs[TopicSearchPeer] = topic
|
||||||
|
ix.LongLivedStreamRecordedService.LongLivedPubSubService.PubsubMu.Unlock()
|
||||||
|
|
||||||
|
common.SubscribeEvents(
|
||||||
|
ix.LongLivedStreamRecordedService.LongLivedPubSubService,
|
||||||
|
context.Background(),
|
||||||
|
TopicSearchPeer,
|
||||||
|
-1,
|
||||||
|
func(_ context.Context, q common.SearchQuery, _ string) {
|
||||||
|
ix.onSearchQuery(q)
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// onSearchQuery handles an incoming GossipSub search broadcast.
|
||||||
|
// If we have matching referencedNodes, we respond to the emitting indexer.
|
||||||
|
func (ix *IndexerService) onSearchQuery(q common.SearchQuery) {
|
||||||
|
// Don't respond to our own broadcasts.
|
||||||
|
if q.EmitterID == ix.Host.ID().String() {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
hits := ix.searchReferenced(q.PeerID, q.DID, q.Name)
|
||||||
|
if len(hits) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
emitterID, err := pp.Decode(q.EmitterID)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
s, err := ix.Host.NewStream(ctx, emitterID, common.ProtocolSearchPeerResponse)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer s.Reset()
|
||||||
|
s.SetDeadline(time.Now().Add(5 * time.Second))
|
||||||
|
json.NewEncoder(s).Encode(common.SearchPeerResult{QueryID: q.QueryID, Records: hits})
|
||||||
|
}
|
||||||
@@ -2,10 +2,14 @@ package indexer
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"math/rand"
|
||||||
"oc-discovery/conf"
|
"oc-discovery/conf"
|
||||||
"oc-discovery/daemons/node/common"
|
"oc-discovery/daemons/node/common"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
oclib "cloud.o-forge.io/core/oc-lib"
|
oclib "cloud.o-forge.io/core/oc-lib"
|
||||||
dht "github.com/libp2p/go-libp2p-kad-dht"
|
dht "github.com/libp2p/go-libp2p-kad-dht"
|
||||||
@@ -15,6 +19,28 @@ import (
|
|||||||
pp "github.com/libp2p/go-libp2p/core/peer"
|
pp "github.com/libp2p/go-libp2p/core/peer"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// dhtCacheEntry holds one indexer discovered via DHT for use in suggestion responses.
|
||||||
|
type dhtCacheEntry struct {
|
||||||
|
AI pp.AddrInfo
|
||||||
|
LastSeen time.Time
|
||||||
|
}
|
||||||
|
|
||||||
|
// offloadState tracks which nodes we've already proposed migration to.
|
||||||
|
// When an indexer is overloaded (fill rate > offloadThreshold) it only sends
|
||||||
|
// SuggestMigrate to a small batch at a time; peers that don't migrate within
|
||||||
|
// offloadGracePeriod are moved to alreadyTried so a new batch can be picked.
|
||||||
|
type offloadState struct {
|
||||||
|
inBatch map[pp.ID]time.Time // peer → time added to current batch
|
||||||
|
alreadyTried map[pp.ID]struct{} // peers proposed to that didn't migrate
|
||||||
|
mu sync.Mutex
|
||||||
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
offloadThreshold = 0.80 // fill rate above which to start offloading
|
||||||
|
offloadBatchSize = 5 // max concurrent "please migrate" proposals
|
||||||
|
offloadGracePeriod = 3 * common.RecommendedHeartbeatInterval
|
||||||
|
)
|
||||||
|
|
||||||
// IndexerService manages the indexer node's state: stream records, DHT, pubsub.
|
// IndexerService manages the indexer node's state: stream records, DHT, pubsub.
|
||||||
type IndexerService struct {
|
type IndexerService struct {
|
||||||
*common.LongLivedStreamRecordedService[PeerRecord]
|
*common.LongLivedStreamRecordedService[PeerRecord]
|
||||||
@@ -22,21 +48,41 @@ type IndexerService struct {
|
|||||||
DHT *dht.IpfsDHT
|
DHT *dht.IpfsDHT
|
||||||
isStrictIndexer bool
|
isStrictIndexer bool
|
||||||
mu sync.RWMutex
|
mu sync.RWMutex
|
||||||
IsNative bool
|
|
||||||
Native *NativeState // non-nil when IsNative == true
|
|
||||||
nameIndex *nameIndexState
|
nameIndex *nameIndexState
|
||||||
|
dhtProvideCancel context.CancelFunc
|
||||||
|
bornAt time.Time
|
||||||
|
// Passive DHT cache: refreshed every 2 min in background, used for suggestions.
|
||||||
|
dhtCache []dhtCacheEntry
|
||||||
|
dhtCacheMu sync.RWMutex
|
||||||
|
// Offload state for overloaded-indexer migration proposals.
|
||||||
|
offload offloadState
|
||||||
|
// referencedNodes holds nodes that have designated this indexer as their
|
||||||
|
// search referent (Heartbeat.Referent=true). Used for distributed search.
|
||||||
|
referencedNodes map[pp.ID]PeerRecord
|
||||||
|
referencedNodesMu sync.RWMutex
|
||||||
|
// pendingSearches maps queryID → result channel for in-flight searches.
|
||||||
|
pendingSearches map[string]chan []common.SearchHit
|
||||||
|
pendingSearchesMu sync.Mutex
|
||||||
|
// behavior tracks per-node compliance (heartbeat rate, publish/get volume,
|
||||||
|
// identity consistency, signature failures).
|
||||||
|
behavior *NodeBehaviorTracker
|
||||||
|
// connGuard limits new-connection bursts to protect public indexers.
|
||||||
|
connGuard *ConnectionRateGuard
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewIndexerService creates an IndexerService.
|
// NewIndexerService creates an IndexerService.
|
||||||
// If ps is nil, this is a strict indexer (no pre-existing gossip sub from a node).
|
// If ps is nil, this is a strict indexer (no pre-existing gossip sub from a node).
|
||||||
func NewIndexerService(h host.Host, ps *pubsub.PubSub, maxNode int, isNative bool) *IndexerService {
|
func NewIndexerService(h host.Host, ps *pubsub.PubSub, maxNode int) *IndexerService {
|
||||||
logger := oclib.GetLogger()
|
logger := oclib.GetLogger()
|
||||||
logger.Info().Msg("open indexer mode...")
|
logger.Info().Msg("open indexer mode...")
|
||||||
var err error
|
var err error
|
||||||
ix := &IndexerService{
|
ix := &IndexerService{
|
||||||
LongLivedStreamRecordedService: common.NewStreamRecordedService[PeerRecord](h, maxNode),
|
LongLivedStreamRecordedService: common.NewStreamRecordedService[PeerRecord](h, maxNode),
|
||||||
isStrictIndexer: ps == nil,
|
isStrictIndexer: ps == nil,
|
||||||
IsNative: isNative,
|
referencedNodes: map[pp.ID]PeerRecord{},
|
||||||
|
pendingSearches: map[string]chan []common.SearchHit{},
|
||||||
|
behavior: newNodeBehaviorTracker(),
|
||||||
|
connGuard: newConnectionRateGuard(),
|
||||||
}
|
}
|
||||||
if ps == nil {
|
if ps == nil {
|
||||||
ps, err = pubsub.NewGossipSub(context.Background(), ix.Host)
|
ps, err = pubsub.NewGossipSub(context.Background(), ix.Host)
|
||||||
@@ -46,25 +92,45 @@ func NewIndexerService(h host.Host, ps *pubsub.PubSub, maxNode int, isNative boo
|
|||||||
}
|
}
|
||||||
ix.PS = ps
|
ix.PS = ps
|
||||||
|
|
||||||
if ix.isStrictIndexer && !isNative {
|
if ix.isStrictIndexer {
|
||||||
logger.Info().Msg("connect to indexers as strict indexer...")
|
logger.Info().Msg("connect to indexers as strict indexer...")
|
||||||
common.ConnectToIndexers(h, conf.GetConfig().MinIndexer, conf.GetConfig().MaxIndexer, ix.Host.ID())
|
common.ConnectToIndexers(h, conf.GetConfig().MinIndexer, conf.GetConfig().MaxIndexer*2)
|
||||||
logger.Info().Msg("subscribe to decentralized search flow as strict indexer...")
|
logger.Info().Msg("subscribe to decentralized search flow as strict indexer...")
|
||||||
go ix.SubscribeToSearch(ix.PS, nil)
|
go ix.SubscribeToSearch(ix.PS, nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
if !isNative {
|
|
||||||
logger.Info().Msg("init distributed name index...")
|
logger.Info().Msg("init distributed name index...")
|
||||||
ix.initNameIndex(ps)
|
ix.initNameIndex(ps)
|
||||||
ix.LongLivedStreamRecordedService.AfterDelete = func(pid pp.ID, name, did string) {
|
ix.LongLivedStreamRecordedService.AfterDelete = func(pid pp.ID, name, did string) {
|
||||||
ix.publishNameEvent(NameIndexDelete, name, pid.String(), did)
|
ix.publishNameEvent(NameIndexDelete, name, pid.String(), did)
|
||||||
}
|
// Remove behavior state for peers that are no longer connected and
|
||||||
|
// have no active ban — keeps memory bounded to the live node set.
|
||||||
|
ix.behavior.Cleanup(pid)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse bootstrap peers from configured native/indexer addresses so that the
|
// AllowInbound: fired once per stream open, before any heartbeat is decoded.
|
||||||
// DHT can find its routing table entries even in a fresh deployment.
|
// 1. Reject peers that are currently banned (behavioral strikes).
|
||||||
|
// 2. For genuinely new connections, apply the burst guard.
|
||||||
|
ix.AllowInbound = func(remotePeer pp.ID, isNew bool) error {
|
||||||
|
if ix.behavior.IsBanned(remotePeer) {
|
||||||
|
return errors.New("peer is banned")
|
||||||
|
}
|
||||||
|
if isNew && !ix.connGuard.Allow() {
|
||||||
|
return errors.New("connection rate limit exceeded, retry later")
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ValidateHeartbeat: fired on every heartbeat tick for an established stream.
|
||||||
|
// Checks heartbeat cadence — rejects if the node is sending too fast.
|
||||||
|
ix.ValidateHeartbeat = func(remotePeer pp.ID) error {
|
||||||
|
return ix.behavior.RecordHeartbeat(remotePeer)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse bootstrap peers from configured indexer addresses so the DHT can
|
||||||
|
// find its routing table entries even in a fresh deployment.
|
||||||
var bootstrapPeers []pp.AddrInfo
|
var bootstrapPeers []pp.AddrInfo
|
||||||
for _, addrStr := range strings.Split(conf.GetConfig().NativeIndexerAddresses+","+conf.GetConfig().IndexerAddresses, ",") {
|
for _, addrStr := range strings.Split(conf.GetConfig().IndexerAddresses, ",") {
|
||||||
addrStr = strings.TrimSpace(addrStr)
|
addrStr = strings.TrimSpace(addrStr)
|
||||||
if addrStr == "" {
|
if addrStr == "" {
|
||||||
continue
|
continue
|
||||||
@@ -75,10 +141,9 @@ func NewIndexerService(h host.Host, ps *pubsub.PubSub, maxNode int, isNative boo
|
|||||||
}
|
}
|
||||||
dhtOpts := []dht.Option{
|
dhtOpts := []dht.Option{
|
||||||
dht.Mode(dht.ModeServer),
|
dht.Mode(dht.ModeServer),
|
||||||
dht.ProtocolPrefix("oc"), // 🔥 réseau privé
|
dht.ProtocolPrefix("oc"),
|
||||||
dht.Validator(record.NamespacedValidator{
|
dht.Validator(record.NamespacedValidator{
|
||||||
"node": PeerRecordValidator{},
|
"node": PeerRecordValidator{},
|
||||||
"indexer": IndexerRecordValidator{}, // for native indexer registry
|
|
||||||
"name": DefaultValidator{},
|
"name": DefaultValidator{},
|
||||||
"pid": DefaultValidator{},
|
"pid": DefaultValidator{},
|
||||||
}),
|
}),
|
||||||
@@ -91,14 +156,204 @@ func NewIndexerService(h host.Host, ps *pubsub.PubSub, maxNode int, isNative boo
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// InitNative must happen after DHT is ready
|
// Make the DHT available for replenishment from other packages.
|
||||||
if isNative {
|
common.SetDiscoveryDHT(ix.DHT)
|
||||||
ix.InitNative()
|
|
||||||
} else {
|
ix.bornAt = time.Now().UTC()
|
||||||
|
ix.offload.inBatch = make(map[pp.ID]time.Time)
|
||||||
|
ix.offload.alreadyTried = make(map[pp.ID]struct{})
|
||||||
ix.initNodeHandler()
|
ix.initNodeHandler()
|
||||||
// Register with configured natives so this indexer appears in their cache.
|
|
||||||
// Pass a fill rate provider so the native can route new nodes to less-loaded indexers.
|
// Build and send a HeartbeatResponse after each received node heartbeat.
|
||||||
if nativeAddrs := conf.GetConfig().NativeIndexerAddresses; nativeAddrs != "" {
|
// Raw metrics only — no pre-cooked score. Node computes the score itself.
|
||||||
|
ix.BuildHeartbeatResponse = func(remotePeer pp.ID, need int, challenges []string, challengeDID string, referent bool) *common.HeartbeatResponse {
|
||||||
|
ix.StreamMU.RLock()
|
||||||
|
peerCount := len(ix.StreamRecords[common.ProtocolHeartbeat])
|
||||||
|
// Collect lastSeen per active peer for challenge responses.
|
||||||
|
type peerMeta struct {
|
||||||
|
found bool
|
||||||
|
lastSeen time.Time
|
||||||
|
}
|
||||||
|
peerLookup := make(map[string]peerMeta, peerCount)
|
||||||
|
var remotePeerRecord PeerRecord
|
||||||
|
for pid, rec := range ix.StreamRecords[common.ProtocolHeartbeat] {
|
||||||
|
var ls time.Time
|
||||||
|
if rec.HeartbeatStream != nil && rec.HeartbeatStream.UptimeTracker != nil {
|
||||||
|
ls = rec.HeartbeatStream.UptimeTracker.LastSeen
|
||||||
|
}
|
||||||
|
peerLookup[pid.String()] = peerMeta{found: true, lastSeen: ls}
|
||||||
|
if pid == remotePeer {
|
||||||
|
remotePeerRecord = rec.Record
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ix.StreamMU.RUnlock()
|
||||||
|
|
||||||
|
// Update referent designation: node marks its best-scored indexer with Referent=true.
|
||||||
|
ix.updateReferent(remotePeer, remotePeerRecord, referent)
|
||||||
|
|
||||||
|
maxN := ix.MaxNodesConn()
|
||||||
|
fillRate := 0.0
|
||||||
|
if maxN > 0 {
|
||||||
|
fillRate = float64(peerCount) / float64(maxN)
|
||||||
|
if fillRate > 1 {
|
||||||
|
fillRate = 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resp := &common.HeartbeatResponse{
|
||||||
|
FillRate: fillRate,
|
||||||
|
PeerCount: peerCount,
|
||||||
|
MaxNodes: maxN,
|
||||||
|
BornAt: ix.bornAt,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Answer each challenged PeerID with raw found + lastSeen.
|
||||||
|
for _, pidStr := range challenges {
|
||||||
|
meta := peerLookup[pidStr] // zero value if not found
|
||||||
|
entry := common.ChallengeEntry{
|
||||||
|
PeerID: pidStr,
|
||||||
|
Found: meta.found,
|
||||||
|
LastSeen: meta.lastSeen,
|
||||||
|
}
|
||||||
|
resp.Challenges = append(resp.Challenges, entry)
|
||||||
|
}
|
||||||
|
|
||||||
|
// DHT challenge: retrieve the node's own DID to prove DHT is functional.
|
||||||
|
if challengeDID != "" {
|
||||||
|
ctx3, cancel3 := context.WithTimeout(context.Background(), 3*time.Second)
|
||||||
|
val, err := ix.DHT.GetValue(ctx3, "/node/"+challengeDID)
|
||||||
|
cancel3()
|
||||||
|
resp.DHTFound = err == nil
|
||||||
|
if err == nil {
|
||||||
|
resp.DHTPayload = json.RawMessage(val)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Random sample of connected nodes as witnesses (up to 3).
|
||||||
|
// Never include the requesting peer itself — asking a node to witness
|
||||||
|
// itself is circular and meaningless.
|
||||||
|
ix.StreamMU.RLock()
|
||||||
|
for pidStr := range peerLookup {
|
||||||
|
if len(resp.Witnesses) >= 3 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
pid, err := pp.Decode(pidStr)
|
||||||
|
if err != nil || pid == remotePeer || pid == ix.Host.ID() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
addrs := ix.Host.Peerstore().Addrs(pid)
|
||||||
|
ai := common.FilterLoopbackAddrs(pp.AddrInfo{ID: pid, Addrs: addrs})
|
||||||
|
if len(ai.Addrs) > 0 {
|
||||||
|
resp.Witnesses = append(resp.Witnesses, ai)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ix.StreamMU.RUnlock()
|
||||||
|
|
||||||
|
// Attach suggestions: exactly `need` entries from the DHT cache.
|
||||||
|
// If the indexer is overloaded (SuggestMigrate will be set below), always
|
||||||
|
// provide at least 1 suggestion even when need == 0, so the node has
|
||||||
|
// somewhere to go.
|
||||||
|
suggestionsNeeded := need
|
||||||
|
if fillRate > offloadThreshold && suggestionsNeeded < 1 {
|
||||||
|
suggestionsNeeded = 1
|
||||||
|
}
|
||||||
|
if suggestionsNeeded > 0 {
|
||||||
|
ix.dhtCacheMu.RLock()
|
||||||
|
// When offloading, pick from a random offset within the top N of the
|
||||||
|
// cache so concurrent migrations spread across multiple targets rather
|
||||||
|
// than all rushing to the same least-loaded indexer (thundering herd).
|
||||||
|
// For normal need-based suggestions the full sorted order is fine.
|
||||||
|
cache := ix.dhtCache
|
||||||
|
if fillRate > offloadThreshold && len(cache) > suggestionsNeeded {
|
||||||
|
const spreadWindow = 5 // sample from the top-5 least-loaded
|
||||||
|
window := spreadWindow
|
||||||
|
if window > len(cache) {
|
||||||
|
window = len(cache)
|
||||||
|
}
|
||||||
|
start := rand.Intn(window)
|
||||||
|
cache = cache[start:]
|
||||||
|
}
|
||||||
|
for _, e := range cache {
|
||||||
|
if len(resp.Suggestions) >= suggestionsNeeded {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// Never suggest the requesting peer itself or this indexer.
|
||||||
|
if e.AI.ID == remotePeer || e.AI.ID == h.ID() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
resp.Suggestions = append(resp.Suggestions, e.AI)
|
||||||
|
}
|
||||||
|
ix.dhtCacheMu.RUnlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Offload logic: when fill rate is too high, selectively ask nodes to migrate.
|
||||||
|
if fillRate > offloadThreshold && len(resp.Suggestions) > 0 {
|
||||||
|
now := time.Now()
|
||||||
|
ix.offload.mu.Lock()
|
||||||
|
// Expire stale batch entries -> move to alreadyTried.
|
||||||
|
for pid, addedAt := range ix.offload.inBatch {
|
||||||
|
if now.Sub(addedAt) > offloadGracePeriod {
|
||||||
|
ix.offload.alreadyTried[pid] = struct{}{}
|
||||||
|
delete(ix.offload.inBatch, pid)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Reset alreadyTried if we've exhausted the whole pool.
|
||||||
|
if len(ix.offload.alreadyTried) >= peerCount {
|
||||||
|
ix.offload.alreadyTried = make(map[pp.ID]struct{})
|
||||||
|
}
|
||||||
|
_, tried := ix.offload.alreadyTried[remotePeer]
|
||||||
|
_, inBatch := ix.offload.inBatch[remotePeer]
|
||||||
|
if !tried {
|
||||||
|
if inBatch {
|
||||||
|
resp.SuggestMigrate = true
|
||||||
|
} else if len(ix.offload.inBatch) < offloadBatchSize {
|
||||||
|
ix.offload.inBatch[remotePeer] = now
|
||||||
|
resp.SuggestMigrate = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ix.offload.mu.Unlock()
|
||||||
|
} else if fillRate <= offloadThreshold {
|
||||||
|
// Fill rate back to normal: reset offload state.
|
||||||
|
ix.offload.mu.Lock()
|
||||||
|
if len(ix.offload.inBatch) > 0 || len(ix.offload.alreadyTried) > 0 {
|
||||||
|
ix.offload.inBatch = make(map[pp.ID]time.Time)
|
||||||
|
ix.offload.alreadyTried = make(map[pp.ID]struct{})
|
||||||
|
}
|
||||||
|
ix.offload.mu.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bootstrap: if this indexer has no indexers of its own, probe the
|
||||||
|
// connecting peer to check it supports ProtocolHeartbeat (i.e. it is
|
||||||
|
// itself an indexer). Plain nodes do not register the handler and the
|
||||||
|
// negotiation fails instantly — no wasted heartbeat cycle.
|
||||||
|
// Run in a goroutine: the probe is a short blocking stream open.
|
||||||
|
if len(common.Indexers.GetAddrs()) == 0 && remotePeer != h.ID() {
|
||||||
|
pid := remotePeer
|
||||||
|
go func() {
|
||||||
|
if !common.SupportsHeartbeat(h, pid) {
|
||||||
|
logger.Debug().Str("peer", pid.String()).
|
||||||
|
Msg("[bootstrap] inbound peer has no heartbeat handler — not an indexer, skipping")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
addrs := h.Peerstore().Addrs(pid)
|
||||||
|
ai := common.FilterLoopbackAddrs(pp.AddrInfo{ID: pid, Addrs: addrs})
|
||||||
|
if len(ai.Addrs) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
key := pid.String()
|
||||||
|
if !common.Indexers.ExistsAddr(key) {
|
||||||
|
adCopy := ai
|
||||||
|
common.Indexers.SetAddr(key, &adCopy)
|
||||||
|
common.Indexers.NudgeIt()
|
||||||
|
logger.Info().Str("peer", key).Msg("[bootstrap] no indexers — added inbound indexer peer as candidate")
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
return resp
|
||||||
|
}
|
||||||
|
|
||||||
|
// Advertise this indexer in the DHT so nodes can discover it.
|
||||||
fillRateFn := func() float64 {
|
fillRateFn := func() float64 {
|
||||||
ix.StreamMU.RLock()
|
ix.StreamMU.RLock()
|
||||||
n := len(ix.StreamRecords[common.ProtocolHeartbeat])
|
n := len(ix.StreamRecords[common.ProtocolHeartbeat])
|
||||||
@@ -113,22 +368,124 @@ func NewIndexerService(h host.Host, ps *pubsub.PubSub, maxNode int, isNative boo
|
|||||||
}
|
}
|
||||||
return rate
|
return rate
|
||||||
}
|
}
|
||||||
common.StartNativeRegistration(ix.Host, nativeAddrs, fillRateFn)
|
ix.startDHTCacheRefresh()
|
||||||
}
|
ix.startDHTProvide(fillRateFn)
|
||||||
}
|
|
||||||
return ix
|
return ix
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// startDHTCacheRefresh periodically queries the DHT for peer indexers and
|
||||||
|
// refreshes ix.dhtCache. This passive cache is used by BuildHeartbeatResponse
|
||||||
|
// to suggest better indexers to connected nodes without any per-request cost.
|
||||||
|
func (ix *IndexerService) startDHTCacheRefresh() {
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
// Store cancel alongside the provide cancel so Close() stops both.
|
||||||
|
prevCancel := ix.dhtProvideCancel
|
||||||
|
ix.dhtProvideCancel = func() {
|
||||||
|
if prevCancel != nil {
|
||||||
|
prevCancel()
|
||||||
|
}
|
||||||
|
cancel()
|
||||||
|
}
|
||||||
|
go func() {
|
||||||
|
logger := oclib.GetLogger()
|
||||||
|
refresh := func() {
|
||||||
|
if ix.DHT == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Fetch more than needed so SelectByFillRate can filter for diversity.
|
||||||
|
raw := common.DiscoverIndexersFromDHT(ix.Host, ix.DHT, 30)
|
||||||
|
if len(raw) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Remove self before selection.
|
||||||
|
filtered := raw[:0]
|
||||||
|
for _, ai := range raw {
|
||||||
|
if ai.ID != ix.Host.ID() {
|
||||||
|
filtered = append(filtered, ai)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// SelectByFillRate applies /24 subnet diversity and fill-rate weighting.
|
||||||
|
// Fill rates are unknown at this stage (nil map) so all peers get
|
||||||
|
// the neutral prior f=0.5 — diversity filtering still applies.
|
||||||
|
selected := common.SelectByFillRate(filtered, nil, 10)
|
||||||
|
now := time.Now()
|
||||||
|
ix.dhtCacheMu.Lock()
|
||||||
|
ix.dhtCache = ix.dhtCache[:0]
|
||||||
|
for _, ai := range selected {
|
||||||
|
ix.dhtCache = append(ix.dhtCache, dhtCacheEntry{AI: ai, LastSeen: now})
|
||||||
|
}
|
||||||
|
ix.dhtCacheMu.Unlock()
|
||||||
|
logger.Info().Int("cached", len(selected)).Msg("[dht] indexer suggestion cache refreshed")
|
||||||
|
}
|
||||||
|
// Initial delay: let the DHT routing table warm up first.
|
||||||
|
select {
|
||||||
|
case <-time.After(30 * time.Second):
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
}
|
||||||
|
refresh()
|
||||||
|
t := time.NewTicker(2 * time.Minute)
|
||||||
|
defer t.Stop()
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-t.C:
|
||||||
|
refresh()
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
// startDHTProvide bootstraps the DHT and starts a goroutine that periodically
|
||||||
|
// advertises this indexer under the well-known provider key.
|
||||||
|
func (ix *IndexerService) startDHTProvide(fillRateFn func() float64) {
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
ix.dhtProvideCancel = cancel
|
||||||
|
go func() {
|
||||||
|
logger := oclib.GetLogger()
|
||||||
|
// Wait until a routable (non-loopback) address is available.
|
||||||
|
for i := 0; i < 12; i++ {
|
||||||
|
addrs := ix.Host.Addrs()
|
||||||
|
if len(addrs) > 0 && !strings.Contains(addrs[len(addrs)-1].String(), "127.0.0.1") {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case <-time.After(5 * time.Second):
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err := ix.DHT.Bootstrap(ctx); err != nil {
|
||||||
|
logger.Warn().Err(err).Msg("[dht] bootstrap failed")
|
||||||
|
}
|
||||||
|
provide := func() {
|
||||||
|
pCtx, pCancel := context.WithTimeout(ctx, 30*time.Second)
|
||||||
|
defer pCancel()
|
||||||
|
if err := ix.DHT.Provide(pCtx, common.IndexerCID(), true); err != nil {
|
||||||
|
logger.Warn().Err(err).Msg("[dht] Provide failed")
|
||||||
|
} else {
|
||||||
|
logger.Info().Float64("fill_rate", fillRateFn()).Msg("[dht] indexer advertised in DHT")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
provide()
|
||||||
|
t := time.NewTicker(common.RecommendedHeartbeatInterval)
|
||||||
|
defer t.Stop()
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-t.C:
|
||||||
|
provide()
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
func (ix *IndexerService) Close() {
|
func (ix *IndexerService) Close() {
|
||||||
if ix.Native != nil && ix.Native.cancel != nil {
|
if ix.dhtProvideCancel != nil {
|
||||||
ix.Native.cancel()
|
ix.dhtProvideCancel()
|
||||||
}
|
|
||||||
// Explicitly deregister from natives on clean shutdown so they evict this
|
|
||||||
// indexer immediately rather than waiting for TTL expiry (~90 s).
|
|
||||||
if !ix.IsNative {
|
|
||||||
if nativeAddrs := conf.GetConfig().NativeIndexerAddresses; nativeAddrs != "" {
|
|
||||||
common.UnregisterFromNative(ix.Host, nativeAddrs)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
ix.DHT.Close()
|
ix.DHT.Close()
|
||||||
ix.PS.UnregisterTopicValidator(common.TopicPubSubSearch)
|
ix.PS.UnregisterTopicValidator(common.TopicPubSubSearch)
|
||||||
|
|||||||
@@ -73,14 +73,12 @@ func ListenNATS(n *Node) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
// Non-partner: close any existing streams for this peer.
|
||||||
|
if p.Relation != peer.PARTNER {
|
||||||
n.StreamService.Mu.Lock()
|
n.StreamService.Mu.Lock()
|
||||||
defer n.StreamService.Mu.Unlock()
|
defer n.StreamService.Mu.Unlock()
|
||||||
|
|
||||||
if p.Relation == peer.PARTNER {
|
|
||||||
n.StreamService.ConnectToPartner(p.StreamAddress)
|
|
||||||
} else {
|
|
||||||
ps := common.ProtocolStream{}
|
ps := common.ProtocolStream{}
|
||||||
for p, s := range n.StreamService.Streams {
|
for proto, s := range n.StreamService.Streams {
|
||||||
m := map[pp.ID]*common.Stream{}
|
m := map[pp.ID]*common.Stream{}
|
||||||
for k := range s {
|
for k := range s {
|
||||||
if ad.ID != k {
|
if ad.ID != k {
|
||||||
@@ -89,7 +87,7 @@ func ListenNATS(n *Node) {
|
|||||||
s[k].Stream.Close()
|
s[k].Stream.Close()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ps[p] = m
|
ps[proto] = m
|
||||||
}
|
}
|
||||||
n.StreamService.Streams = ps
|
n.StreamService.Streams = ps
|
||||||
}
|
}
|
||||||
@@ -167,7 +165,7 @@ func ListenNATS(n *Node) {
|
|||||||
if m["peer_id"] == nil { // send to every active stream
|
if m["peer_id"] == nil { // send to every active stream
|
||||||
n.StreamService.Mu.Lock()
|
n.StreamService.Mu.Lock()
|
||||||
if n.StreamService.Streams[stream.ProtocolSendPlanner] != nil {
|
if n.StreamService.Streams[stream.ProtocolSendPlanner] != nil {
|
||||||
for pid := range n.StreamService.Streams[stream.ProtocolSendPlanner] {
|
for pid := range n.StreamService.Streams[stream.ProtocolSendPlanner] { // send Planner can be long lived - it's a conn
|
||||||
n.StreamService.PublishCommon(nil, resp.User, pid.String(), stream.ProtocolSendPlanner, b)
|
n.StreamService.PublishCommon(nil, resp.User, pid.String(), stream.ProtocolSendPlanner, b)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -192,18 +190,18 @@ func ListenNATS(n *Node) {
|
|||||||
if propalgation.DataType == int(tools.PEER) {
|
if propalgation.DataType == int(tools.PEER) {
|
||||||
m := map[string]interface{}{}
|
m := map[string]interface{}{}
|
||||||
if err := json.Unmarshal(propalgation.Payload, &m); err == nil {
|
if err := json.Unmarshal(propalgation.Payload, &m); err == nil {
|
||||||
if peers, err := n.GetPeerRecord(context.Background(), fmt.Sprintf("%v", m["search"]), true); err == nil {
|
needle := fmt.Sprintf("%v", m["search"])
|
||||||
for _, p := range peers {
|
userKey := resp.User
|
||||||
if b, err := json.Marshal(p); err == nil {
|
go n.SearchPeerRecord(userKey, needle, func(hit common.SearchHit) {
|
||||||
go tools.NewNATSCaller().SetNATSPub(tools.SEARCH_EVENT, tools.NATSResponse{
|
if b, err := json.Marshal(hit); err == nil {
|
||||||
|
tools.NewNATSCaller().SetNATSPub(tools.SEARCH_EVENT, tools.NATSResponse{
|
||||||
FromApp: "oc-discovery",
|
FromApp: "oc-discovery",
|
||||||
Datatype: tools.DataType(tools.PEER),
|
Datatype: tools.DataType(tools.PEER),
|
||||||
Method: int(tools.SEARCH_EVENT),
|
Method: int(tools.SEARCH_EVENT),
|
||||||
Payload: b,
|
Payload: b,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
})
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -21,10 +21,18 @@ import (
|
|||||||
"github.com/libp2p/go-libp2p"
|
"github.com/libp2p/go-libp2p"
|
||||||
pubsubs "github.com/libp2p/go-libp2p-pubsub"
|
pubsubs "github.com/libp2p/go-libp2p-pubsub"
|
||||||
"github.com/libp2p/go-libp2p/core/crypto"
|
"github.com/libp2p/go-libp2p/core/crypto"
|
||||||
|
"github.com/libp2p/go-libp2p/core/network"
|
||||||
pp "github.com/libp2p/go-libp2p/core/peer"
|
pp "github.com/libp2p/go-libp2p/core/peer"
|
||||||
"github.com/libp2p/go-libp2p/core/protocol"
|
"github.com/libp2p/go-libp2p/core/protocol"
|
||||||
|
"github.com/libp2p/go-libp2p/p2p/security/noise"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// activeSearch tracks an in-flight distributed peer search for one user.
|
||||||
|
type activeSearch struct {
|
||||||
|
queryID string
|
||||||
|
cancel context.CancelFunc
|
||||||
|
}
|
||||||
|
|
||||||
type Node struct {
|
type Node struct {
|
||||||
*common.LongLivedStreamRecordedService[interface{}] // change type of stream
|
*common.LongLivedStreamRecordedService[interface{}] // change type of stream
|
||||||
PS *pubsubs.PubSub
|
PS *pubsubs.PubSub
|
||||||
@@ -35,10 +43,14 @@ type Node struct {
|
|||||||
isIndexer bool
|
isIndexer bool
|
||||||
peerRecord *indexer.PeerRecord
|
peerRecord *indexer.PeerRecord
|
||||||
|
|
||||||
|
// activeSearches: one streaming search per user; new search cancels previous.
|
||||||
|
activeSearchesMu sync.Mutex
|
||||||
|
activeSearches map[string]*activeSearch
|
||||||
|
|
||||||
Mu sync.RWMutex
|
Mu sync.RWMutex
|
||||||
}
|
}
|
||||||
|
|
||||||
func InitNode(isNode bool, isIndexer bool, isNativeIndexer bool) (*Node, error) {
|
func InitNode(isNode bool, isIndexer bool) (*Node, error) {
|
||||||
if !isNode && !isIndexer {
|
if !isNode && !isIndexer {
|
||||||
return nil, errors.New("wait... what ? your node need to at least something. Retry we can't be friend in that case")
|
return nil, errors.New("wait... what ? your node need to at least something. Retry we can't be friend in that case")
|
||||||
}
|
}
|
||||||
@@ -54,13 +66,17 @@ func InitNode(isNode bool, isIndexer bool, isNativeIndexer bool) (*Node, error)
|
|||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
logger.Info().Msg("open a host...")
|
logger.Info().Msg("open a host...")
|
||||||
|
gater := newOCConnectionGater(nil) // host set below after creation
|
||||||
h, err := libp2p.New(
|
h, err := libp2p.New(
|
||||||
libp2p.PrivateNetwork(psk),
|
libp2p.PrivateNetwork(psk),
|
||||||
libp2p.Identity(priv),
|
libp2p.Identity(priv),
|
||||||
|
libp2p.Security(noise.ID, noise.New),
|
||||||
libp2p.ListenAddrStrings(
|
libp2p.ListenAddrStrings(
|
||||||
fmt.Sprintf("/ip4/0.0.0.0/tcp/%d", conf.GetConfig().NodeEndpointPort),
|
fmt.Sprintf("/ip4/0.0.0.0/tcp/%d", conf.GetConfig().NodeEndpointPort),
|
||||||
),
|
),
|
||||||
|
libp2p.ConnectionGater(gater),
|
||||||
)
|
)
|
||||||
|
gater.host = h // wire host back into gater now that it exists
|
||||||
logger.Info().Msg("Host open on " + h.ID().String())
|
logger.Info().Msg("Host open on " + h.ID().String())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errors.New("no host no node")
|
return nil, errors.New("no host no node")
|
||||||
@@ -69,10 +85,15 @@ func InitNode(isNode bool, isIndexer bool, isNativeIndexer bool) (*Node, error)
|
|||||||
PeerID: h.ID(),
|
PeerID: h.ID(),
|
||||||
isIndexer: isIndexer,
|
isIndexer: isIndexer,
|
||||||
LongLivedStreamRecordedService: common.NewStreamRecordedService[interface{}](h, 1000),
|
LongLivedStreamRecordedService: common.NewStreamRecordedService[interface{}](h, 1000),
|
||||||
|
activeSearches: map[string]*activeSearch{},
|
||||||
}
|
}
|
||||||
// Register the bandwidth probe handler so any peer measuring this node's
|
// Register the bandwidth probe handler so any peer measuring this node's
|
||||||
// throughput can open a dedicated probe stream and read the echo.
|
// throughput can open a dedicated probe stream and read the echo.
|
||||||
h.SetStreamHandler(common.ProtocolBandwidthProbe, common.HandleBandwidthProbe)
|
h.SetStreamHandler(common.ProtocolBandwidthProbe, common.HandleBandwidthProbe)
|
||||||
|
// Register the witness query handler so peers can ask this node's view of indexers.
|
||||||
|
h.SetStreamHandler(common.ProtocolWitnessQuery, func(s network.Stream) {
|
||||||
|
common.HandleWitnessQuery(h, s)
|
||||||
|
})
|
||||||
var ps *pubsubs.PubSub
|
var ps *pubsubs.PubSub
|
||||||
if isNode {
|
if isNode {
|
||||||
logger.Info().Msg("generate opencloud node...")
|
logger.Info().Msg("generate opencloud node...")
|
||||||
@@ -104,7 +125,7 @@ func InitNode(isNode bool, isIndexer bool, isNativeIndexer bool) (*Node, error)
|
|||||||
return json.RawMessage(b)
|
return json.RawMessage(b)
|
||||||
}
|
}
|
||||||
logger.Info().Msg("connect to indexers...")
|
logger.Info().Msg("connect to indexers...")
|
||||||
common.ConnectToIndexers(node.Host, conf.GetConfig().MinIndexer, conf.GetConfig().MaxIndexer, node.PeerID, buildRecord)
|
common.ConnectToIndexers(node.Host, conf.GetConfig().MinIndexer, conf.GetConfig().MaxIndexer, buildRecord)
|
||||||
logger.Info().Msg("claims my node...")
|
logger.Info().Msg("claims my node...")
|
||||||
if _, err := node.claimInfo(conf.GetConfig().Name, conf.GetConfig().Hostname); err != nil {
|
if _, err := node.claimInfo(conf.GetConfig().Name, conf.GetConfig().Hostname); err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
@@ -137,7 +158,7 @@ func InitNode(isNode bool, isIndexer bool, isNativeIndexer bool) (*Node, error)
|
|||||||
}
|
}
|
||||||
if isIndexer {
|
if isIndexer {
|
||||||
logger.Info().Msg("generate opencloud indexer...")
|
logger.Info().Msg("generate opencloud indexer...")
|
||||||
node.IndexerService = indexer.NewIndexerService(node.Host, ps, 500, isNativeIndexer)
|
node.IndexerService = indexer.NewIndexerService(node.Host, ps, 500)
|
||||||
}
|
}
|
||||||
return node, nil
|
return node, nil
|
||||||
}
|
}
|
||||||
@@ -158,20 +179,14 @@ func (d *Node) publishPeerRecord(
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
common.StreamMuIndexes.RLock()
|
|
||||||
indexerSnapshot := make([]*pp.AddrInfo, 0, len(common.StaticIndexers))
|
|
||||||
for _, ad := range common.StaticIndexers {
|
|
||||||
indexerSnapshot = append(indexerSnapshot, ad)
|
|
||||||
}
|
|
||||||
common.StreamMuIndexes.RUnlock()
|
|
||||||
|
|
||||||
for _, ad := range indexerSnapshot {
|
for _, ad := range common.Indexers.GetAddrs() {
|
||||||
var err error
|
var err error
|
||||||
if common.StreamIndexers, err = common.TempStream(d.Host, *ad, common.ProtocolPublish, "", common.StreamIndexers, map[protocol.ID]*common.ProtocolInfo{},
|
if common.Indexers.Streams, err = common.TempStream(d.Host, *ad.Info, common.ProtocolPublish, "", common.Indexers.Streams, map[protocol.ID]*common.ProtocolInfo{},
|
||||||
&common.StreamMuIndexes); err != nil {
|
&common.Indexers.MuStream); err != nil {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
stream := common.StreamIndexers[common.ProtocolPublish][ad.ID]
|
stream := common.Indexers.Streams.GetPerID(common.ProtocolPublish, ad.Info.ID)
|
||||||
base := indexer.PeerRecordPayload{
|
base := indexer.PeerRecordPayload{
|
||||||
Name: rec.Name,
|
Name: rec.Name,
|
||||||
DID: rec.DID,
|
DID: rec.DID,
|
||||||
@@ -188,6 +203,75 @@ func (d *Node) publishPeerRecord(
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SearchPeerRecord starts a distributed peer search via ProtocolSearchPeer.
|
||||||
|
// userKey identifies the requesting user — a new call cancels any previous
|
||||||
|
// search for the same user. Results are pushed to onResult as they arrive.
|
||||||
|
// The function returns when the search stream closes (idle timeout or indexer unreachable).
|
||||||
|
func (d *Node) SearchPeerRecord(userKey, needle string, onResult func(common.SearchHit)) {
|
||||||
|
logger := oclib.GetLogger()
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
|
||||||
|
d.activeSearchesMu.Lock()
|
||||||
|
if prev, ok := d.activeSearches[userKey]; ok {
|
||||||
|
prev.cancel()
|
||||||
|
}
|
||||||
|
queryID := uuid.New().String()
|
||||||
|
d.activeSearches[userKey] = &activeSearch{queryID: queryID, cancel: cancel}
|
||||||
|
d.activeSearchesMu.Unlock()
|
||||||
|
|
||||||
|
defer func() {
|
||||||
|
cancel()
|
||||||
|
d.activeSearchesMu.Lock()
|
||||||
|
if cur, ok := d.activeSearches[userKey]; ok && cur.queryID == queryID {
|
||||||
|
delete(d.activeSearches, userKey)
|
||||||
|
}
|
||||||
|
d.activeSearchesMu.Unlock()
|
||||||
|
}()
|
||||||
|
|
||||||
|
req := common.SearchPeerRequest{QueryID: queryID}
|
||||||
|
if pid, err := pp.Decode(needle); err == nil {
|
||||||
|
req.PeerID = pid.String()
|
||||||
|
} else if _, err := uuid.Parse(needle); err == nil {
|
||||||
|
req.DID = needle
|
||||||
|
} else {
|
||||||
|
req.Name = needle
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try indexers in pool order until one accepts the stream.
|
||||||
|
for _, ad := range common.Indexers.GetAddrs() {
|
||||||
|
if ad.Info == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
dialCtx, dialCancel := context.WithTimeout(ctx, 5*time.Second)
|
||||||
|
s, err := d.Host.NewStream(dialCtx, ad.Info.ID, common.ProtocolSearchPeer)
|
||||||
|
dialCancel()
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if err := json.NewEncoder(s).Encode(req); err != nil {
|
||||||
|
s.Reset()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
dec := json.NewDecoder(s)
|
||||||
|
for {
|
||||||
|
var result common.SearchPeerResult
|
||||||
|
if err := dec.Decode(&result); err != nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if result.QueryID != queryID {
|
||||||
|
continue // stale response from a previous query
|
||||||
|
}
|
||||||
|
for _, hit := range result.Records {
|
||||||
|
onResult(hit)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
s.Reset()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
logger.Warn().Str("user", userKey).Msg("[search] no reachable indexer for peer search")
|
||||||
|
}
|
||||||
|
|
||||||
func (d *Node) GetPeerRecord(
|
func (d *Node) GetPeerRecord(
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
pidOrdid string,
|
pidOrdid string,
|
||||||
@@ -195,13 +279,6 @@ func (d *Node) GetPeerRecord(
|
|||||||
) ([]*peer.Peer, error) {
|
) ([]*peer.Peer, error) {
|
||||||
var err error
|
var err error
|
||||||
var info map[string]indexer.PeerRecord
|
var info map[string]indexer.PeerRecord
|
||||||
common.StreamMuIndexes.RLock()
|
|
||||||
indexerSnapshot2 := make([]*pp.AddrInfo, 0, len(common.StaticIndexers))
|
|
||||||
for _, ad := range common.StaticIndexers {
|
|
||||||
indexerSnapshot2 = append(indexerSnapshot2, ad)
|
|
||||||
}
|
|
||||||
common.StreamMuIndexes.RUnlock()
|
|
||||||
|
|
||||||
// Build the GetValue request: if pidOrdid is neither a UUID DID nor a libp2p
|
// Build the GetValue request: if pidOrdid is neither a UUID DID nor a libp2p
|
||||||
// PeerID, treat it as a human-readable name and let the indexer resolve it.
|
// PeerID, treat it as a human-readable name and let the indexer resolve it.
|
||||||
getReq := indexer.GetValue{Key: pidOrdid}
|
getReq := indexer.GetValue{Key: pidOrdid}
|
||||||
@@ -213,12 +290,12 @@ func (d *Node) GetPeerRecord(
|
|||||||
getReq.Key = ""
|
getReq.Key = ""
|
||||||
}
|
}
|
||||||
getReq.Search = search
|
getReq.Search = search
|
||||||
for _, ad := range indexerSnapshot2 {
|
for _, ad := range common.Indexers.GetAddrs() {
|
||||||
if common.StreamIndexers, err = common.TempStream(d.Host, *ad, common.ProtocolGet, "",
|
if common.Indexers.Streams, err = common.TempStream(d.Host, *ad.Info, common.ProtocolGet, "",
|
||||||
common.StreamIndexers, map[protocol.ID]*common.ProtocolInfo{}, &common.StreamMuIndexes); err != nil {
|
common.Indexers.Streams, map[protocol.ID]*common.ProtocolInfo{}, &common.Indexers.MuStream); err != nil {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
stream := common.StreamIndexers[common.ProtocolGet][ad.ID]
|
stream := common.Indexers.Streams.GetPerID(common.ProtocolGet, ad.Info.ID)
|
||||||
if err := json.NewEncoder(stream.Stream).Encode(getReq); err != nil {
|
if err := json.NewEncoder(stream.Stream).Encode(getReq); err != nil {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -94,28 +94,22 @@ func (abs *StreamService) sendPlanner(event *common.Event) error { //
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
} else { // if not empty so it's
|
||||||
m := map[string]interface{}{}
|
m := map[string]interface{}{}
|
||||||
if err := json.Unmarshal(event.Payload, &m); err == nil {
|
if err := json.Unmarshal(event.Payload, &m); err == nil {
|
||||||
m["peer_id"] = event.From
|
m["peer_id"] = event.From
|
||||||
if pl, err := json.Marshal(m); err == nil {
|
if pl, err := json.Marshal(m); err == nil {
|
||||||
if b, err := json.Marshal(tools.PropalgationMessage{
|
go tools.NewNATSCaller().SetNATSPub(tools.PLANNER_EXECUTION, tools.NATSResponse{
|
||||||
DataType: -1,
|
|
||||||
Action: tools.PB_PLANNER,
|
|
||||||
Payload: pl,
|
|
||||||
}); err == nil {
|
|
||||||
go tools.NewNATSCaller().SetNATSPub(tools.PROPALGATION_EVENT, tools.NATSResponse{
|
|
||||||
FromApp: "oc-discovery",
|
FromApp: "oc-discovery",
|
||||||
Datatype: tools.DataType(oclib.BOOKING),
|
Datatype: tools.DataType(oclib.BOOKING),
|
||||||
Method: int(tools.PROPALGATION_EVENT),
|
Method: int(tools.PLANNER_EXECUTION),
|
||||||
Payload: b,
|
Payload: pl,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
} else {
|
|
||||||
|
|
||||||
}
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -70,8 +70,7 @@ func (ps *StreamService) ToPartnerPublishEvent(
|
|||||||
if err := json.Unmarshal(payload, &p); err != nil {
|
if err := json.Unmarshal(payload, &p); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
pid, err := pp.Decode(p.PeerID)
|
if _, err := pp.Decode(p.PeerID); err != nil {
|
||||||
if err != nil {
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -86,19 +85,7 @@ func (ps *StreamService) ToPartnerPublishEvent(
|
|||||||
if _, err := ps.PublishCommon(dt, user, p.PeerID, ProtocolUpdateResource, b2); err != nil {
|
if _, err := ps.PublishCommon(dt, user, p.PeerID, ProtocolUpdateResource, b2); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if p.Relation == peer.PARTNER {
|
|
||||||
if ps.Streams[ProtocolHeartbeatPartner] == nil {
|
|
||||||
ps.Streams[ProtocolHeartbeatPartner] = map[pp.ID]*common.Stream{}
|
|
||||||
}
|
|
||||||
fmt.Println("SHOULD CONNECT")
|
|
||||||
ps.ConnectToPartner(p.StreamAddress)
|
|
||||||
} else if ps.Streams[ProtocolHeartbeatPartner] != nil && ps.Streams[ProtocolHeartbeatPartner][pid] != nil {
|
|
||||||
for _, pids := range ps.Streams {
|
|
||||||
if pids[pid] != nil {
|
|
||||||
delete(pids, pid)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
|
||||||
"io"
|
"io"
|
||||||
"oc-discovery/conf"
|
"oc-discovery/conf"
|
||||||
"oc-discovery/daemons/node/common"
|
"oc-discovery/daemons/node/common"
|
||||||
@@ -21,7 +20,6 @@ import (
|
|||||||
"github.com/libp2p/go-libp2p/core/network"
|
"github.com/libp2p/go-libp2p/core/network"
|
||||||
pp "github.com/libp2p/go-libp2p/core/peer"
|
pp "github.com/libp2p/go-libp2p/core/peer"
|
||||||
"github.com/libp2p/go-libp2p/core/protocol"
|
"github.com/libp2p/go-libp2p/core/protocol"
|
||||||
ma "github.com/multiformats/go-multiaddr"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
const ProtocolConsidersResource = "/opencloud/resource/considers/1.0"
|
const ProtocolConsidersResource = "/opencloud/resource/considers/1.0"
|
||||||
@@ -59,7 +57,6 @@ type StreamService struct {
|
|||||||
Streams common.ProtocolStream
|
Streams common.ProtocolStream
|
||||||
maxNodesConn int
|
maxNodesConn int
|
||||||
Mu sync.RWMutex
|
Mu sync.RWMutex
|
||||||
// Stream map[protocol.ID]map[pp.ID]*daemons.Stream
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func InitStream(ctx context.Context, h host.Host, key pp.ID, maxNode int, node common.DiscoveryPeer) (*StreamService, error) {
|
func InitStream(ctx context.Context, h host.Host, key pp.ID, maxNode int, node common.DiscoveryPeer) (*StreamService, error) {
|
||||||
@@ -71,8 +68,6 @@ func InitStream(ctx context.Context, h host.Host, key pp.ID, maxNode int, node c
|
|||||||
Streams: common.ProtocolStream{},
|
Streams: common.ProtocolStream{},
|
||||||
maxNodesConn: maxNode,
|
maxNodesConn: maxNode,
|
||||||
}
|
}
|
||||||
logger.Info().Msg("handle to partner heartbeat protocol...")
|
|
||||||
service.Host.SetStreamHandler(ProtocolHeartbeatPartner, service.HandlePartnerHeartbeat)
|
|
||||||
for proto := range protocols {
|
for proto := range protocols {
|
||||||
service.Host.SetStreamHandler(proto, service.HandleResponse)
|
service.Host.SetStreamHandler(proto, service.HandleResponse)
|
||||||
}
|
}
|
||||||
@@ -106,39 +101,11 @@ func (s *StreamService) HandleResponse(stream network.Stream) {
|
|||||||
stream.Protocol(), protocols[stream.Protocol()])
|
stream.Protocol(), protocols[stream.Protocol()])
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *StreamService) HandlePartnerHeartbeat(stream network.Stream) {
|
|
||||||
s.Mu.Lock()
|
|
||||||
if s.Streams[ProtocolHeartbeatPartner] == nil {
|
|
||||||
s.Streams[ProtocolHeartbeatPartner] = map[pp.ID]*common.Stream{}
|
|
||||||
}
|
|
||||||
streams := s.Streams[ProtocolHeartbeatPartner]
|
|
||||||
streamsAnonym := map[pp.ID]common.HeartBeatStreamed{}
|
|
||||||
for k, v := range streams {
|
|
||||||
streamsAnonym[k] = v
|
|
||||||
}
|
|
||||||
s.Mu.Unlock()
|
|
||||||
pid, hb, err := common.CheckHeartbeat(s.Host, stream, json.NewDecoder(stream), streamsAnonym, &s.Mu, s.maxNodesConn)
|
|
||||||
if err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
s.Mu.Lock()
|
|
||||||
defer s.Mu.Unlock()
|
|
||||||
// if record already seen update last seen
|
|
||||||
if rec, ok := streams[*pid]; ok {
|
|
||||||
rec.DID = hb.DID
|
|
||||||
rec.Expiry = time.Now().UTC().Add(10 * time.Second)
|
|
||||||
} else { // if not in stream ?
|
|
||||||
val, err := stream.Conn().RemoteMultiaddr().ValueForProtocol(ma.P_IP4)
|
|
||||||
if err == nil {
|
|
||||||
s.ConnectToPartner(val)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// GC is already running via InitStream — starting a new ticker goroutine on
|
|
||||||
// every heartbeat would leak an unbounded number of goroutines.
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *StreamService) connectToPartners() error {
|
func (s *StreamService) connectToPartners() error {
|
||||||
logger := oclib.GetLogger()
|
logger := oclib.GetLogger()
|
||||||
|
// Register handlers for partner resource protocols (create/update/delete).
|
||||||
|
// Connections to partners happen on-demand via TempStream when needed.
|
||||||
for proto, info := range protocolsPartners {
|
for proto, info := range protocolsPartners {
|
||||||
f := func(ss network.Stream) {
|
f := func(ss network.Stream) {
|
||||||
if s.Streams[proto] == nil {
|
if s.Streams[proto] == nil {
|
||||||
@@ -153,25 +120,9 @@ func (s *StreamService) connectToPartners() error {
|
|||||||
logger.Info().Msg("SetStreamHandler " + string(proto))
|
logger.Info().Msg("SetStreamHandler " + string(proto))
|
||||||
s.Host.SetStreamHandler(proto, f)
|
s.Host.SetStreamHandler(proto, f)
|
||||||
}
|
}
|
||||||
peers, err := s.searchPeer(fmt.Sprintf("%v", peer.PARTNER.EnumIndex()))
|
|
||||||
if err != nil {
|
|
||||||
logger.Err(err)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
for _, p := range peers {
|
|
||||||
s.ConnectToPartner(p.StreamAddress)
|
|
||||||
}
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *StreamService) ConnectToPartner(address string) {
|
|
||||||
logger := oclib.GetLogger()
|
|
||||||
if ad, err := pp.AddrInfoFromString(address); err == nil {
|
|
||||||
logger.Info().Msg("Connect to Partner " + ProtocolHeartbeatPartner + " " + address)
|
|
||||||
common.SendHeartbeat(context.Background(), ProtocolHeartbeatPartner, conf.GetConfig().Name,
|
|
||||||
s.Host, s.Streams, map[string]*pp.AddrInfo{address: ad}, nil, 20*time.Second)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *StreamService) searchPeer(search string) ([]*peer.Peer, error) {
|
func (s *StreamService) searchPeer(search string) ([]*peer.Peer, error) {
|
||||||
ps := []*peer.Peer{}
|
ps := []*peer.Peer{}
|
||||||
@@ -220,11 +171,7 @@ func (s *StreamService) gc() {
|
|||||||
defer s.Mu.Unlock()
|
defer s.Mu.Unlock()
|
||||||
now := time.Now().UTC()
|
now := time.Now().UTC()
|
||||||
|
|
||||||
if s.Streams[ProtocolHeartbeatPartner] == nil {
|
for pid, rec := range s.Streams[ProtocolHeartbeatPartner] {
|
||||||
s.Streams[ProtocolHeartbeatPartner] = map[pp.ID]*common.Stream{}
|
|
||||||
}
|
|
||||||
streams := s.Streams[ProtocolHeartbeatPartner]
|
|
||||||
for pid, rec := range streams {
|
|
||||||
if now.After(rec.Expiry) {
|
if now.After(rec.Expiry) {
|
||||||
for _, sstreams := range s.Streams {
|
for _, sstreams := range s.Streams {
|
||||||
if sstreams[pid] != nil {
|
if sstreams[pid] != nil {
|
||||||
|
|||||||
@@ -1,10 +0,0 @@
|
|||||||
{
|
|
||||||
"MONGO_URL":"mongodb://mongo:27017/",
|
|
||||||
"MONGO_DATABASE":"DC_myDC",
|
|
||||||
"NATS_URL": "nats://nats:4222",
|
|
||||||
"NODE_MODE": "node",
|
|
||||||
"NODE_ENDPOINT_PORT": 4010,
|
|
||||||
"NATIVE_INDEXER_ADDRESSES": "/ip4/172.40.0.5/tcp/4005/p2p/12D3KooWGn3j4XqTSrjJDGGpTQERdDV5TPZdhQp87rAUnvQssvQu",
|
|
||||||
"MIN_INDEXER": 2,
|
|
||||||
"PEER_IDS": "/ip4/172.40.0.9/tcp/4009/p2p/12D3KooWGnQfKwX9E4umCPE8dUKZuig4vw5BndDowRLEbGmcZyta"
|
|
||||||
}
|
|
||||||
@@ -4,6 +4,5 @@
|
|||||||
"NATS_URL": "nats://nats:4222",
|
"NATS_URL": "nats://nats:4222",
|
||||||
"NODE_MODE": "node",
|
"NODE_MODE": "node",
|
||||||
"NODE_ENDPOINT_PORT": 4004,
|
"NODE_ENDPOINT_PORT": 4004,
|
||||||
"INDEXER_ADDRESSES": "/ip4/172.40.0.1/tcp/4001/p2p/12D3KooWGn3j4XqTSrjJDGGpTQERdDV5TPZdhQp87rAUnvQssvQu",
|
"INDEXER_ADDRESSES": "/ip4/172.40.0.1/tcp/4001/p2p/12D3KooWGn3j4XqTSrjJDGGpTQERdDV5TPZdhQp87rAUnvQssvQu"
|
||||||
"PEER_IDS": "/ip4/172.40.0.3/tcp/4003/p2p/12D3KooWBh9kZrekBAE5G33q4jCLNRAzygem3gP1mMdK8mhoCTaw"
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +0,0 @@
|
|||||||
{
|
|
||||||
"MONGO_URL":"mongodb://mongo:27017/",
|
|
||||||
"MONGO_DATABASE":"DC_myDC",
|
|
||||||
"NATS_URL": "nats://nats:4222",
|
|
||||||
"NODE_MODE": "native-indexer",
|
|
||||||
"NODE_ENDPOINT_PORT": 4005
|
|
||||||
}
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
{
|
|
||||||
"MONGO_URL":"mongodb://mongo:27017/",
|
|
||||||
"MONGO_DATABASE":"DC_myDC",
|
|
||||||
"NATS_URL": "nats://nats:4222",
|
|
||||||
"NODE_MODE": "native-indexer",
|
|
||||||
"NODE_ENDPOINT_PORT": 4006,
|
|
||||||
"NATIVE_INDEXER_ADDRESSES": "/ip4/172.40.0.5/tcp/4005/p2p/12D3KooWGn3j4XqTSrjJDGGpTQERdDV5TPZdhQp87rAUnvQssvQu"
|
|
||||||
}
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
{
|
|
||||||
"MONGO_URL":"mongodb://mongo:27017/",
|
|
||||||
"MONGO_DATABASE":"DC_myDC",
|
|
||||||
"NATS_URL": "nats://nats:4222",
|
|
||||||
"NODE_MODE": "indexer",
|
|
||||||
"NODE_ENDPOINT_PORT": 4007,
|
|
||||||
"NATIVE_INDEXER_ADDRESSES": "/ip4/172.40.0.6/tcp/4006/p2p/12D3KooWC3GNStak8KCYtJq11Dxiq45EJV53z1ZvKetMcZBeBX6u"
|
|
||||||
}
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
{
|
|
||||||
"MONGO_URL":"mongodb://mongo:27017/",
|
|
||||||
"MONGO_DATABASE":"DC_myDC",
|
|
||||||
"NATS_URL": "nats://nats:4222",
|
|
||||||
"NODE_MODE": "indexer",
|
|
||||||
"NODE_ENDPOINT_PORT": 4008,
|
|
||||||
"NATIVE_INDEXER_ADDRESSES": "/ip4/172.40.0.5/tcp/4005/p2p/12D3KooWGn3j4XqTSrjJDGGpTQERdDV5TPZdhQp87rAUnvQssvQu"
|
|
||||||
}
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
{
|
|
||||||
"MONGO_URL":"mongodb://mongo:27017/",
|
|
||||||
"MONGO_DATABASE":"DC_myDC",
|
|
||||||
"NATS_URL": "nats://nats:4222",
|
|
||||||
"NODE_MODE": "node",
|
|
||||||
"NODE_ENDPOINT_PORT": 4009,
|
|
||||||
"NATIVE_INDEXER_ADDRESSES": "/ip4/172.40.0.6/tcp/4006/p2p/12D3KooWC3GNStak8KCYtJq11Dxiq45EJV53z1ZvKetMcZBeBX6u,/ip4/172.40.0.5/tcp/4005/p2p/12D3KooWGn3j4XqTSrjJDGGpTQERdDV5TPZdhQp87rAUnvQssvQu"
|
|
||||||
}
|
|
||||||
@@ -4,55 +4,61 @@ title Node Initialization — Peer A (InitNode)
|
|||||||
participant "main (Peer A)" as MainA
|
participant "main (Peer A)" as MainA
|
||||||
participant "Node A" as NodeA
|
participant "Node A" as NodeA
|
||||||
participant "libp2p (Peer A)" as libp2pA
|
participant "libp2p (Peer A)" as libp2pA
|
||||||
|
participant "ConnectionGater A" as GaterA
|
||||||
participant "DB Peer A (oc-lib)" as DBA
|
participant "DB Peer A (oc-lib)" as DBA
|
||||||
participant "NATS A" as NATSA
|
participant "NATS A" as NATSA
|
||||||
participant "Indexer (partagé)" as IndexerA
|
participant "Indexer (shared)" as IndexerA
|
||||||
|
participant "DHT A" as DHTA
|
||||||
participant "StreamService A" as StreamA
|
participant "StreamService A" as StreamA
|
||||||
participant "PubSubService A" as PubSubA
|
participant "PubSubService A" as PubSubA
|
||||||
|
|
||||||
MainA -> NodeA: InitNode(isNode, isIndexer, isNativeIndexer)
|
MainA -> NodeA: InitNode(isNode=true, isIndexer=false)
|
||||||
|
|
||||||
NodeA -> NodeA: LoadKeyFromFilePrivate() → priv
|
NodeA -> NodeA: LoadKeyFromFilePrivate() → priv
|
||||||
NodeA -> NodeA: LoadPSKFromFile() → psk
|
NodeA -> NodeA: LoadPSKFromFile() → psk
|
||||||
|
|
||||||
NodeA -> libp2pA: New(PrivateNetwork(psk), Identity(priv), ListenAddr:4001)
|
NodeA -> GaterA: newOCConnectionGater(nil)
|
||||||
|
NodeA -> libp2pA: New(\n PrivateNetwork(psk),\n Identity(priv),\n ListenAddr: tcp/4001,\n ConnectionGater(gater)\n)
|
||||||
libp2pA --> NodeA: host A (PeerID_A)
|
libp2pA --> NodeA: host A (PeerID_A)
|
||||||
|
NodeA -> GaterA: gater.host = host A
|
||||||
|
|
||||||
note over NodeA: isNode == true
|
note over GaterA: InterceptSecured (inbound):\n1. DB lookup by peer_id\n → BLACKLIST : refuse\n → found : accept\n2. Not found → DHT sequential check\n (transport-error fallthrough only)
|
||||||
|
|
||||||
NodeA -> libp2pA: NewGossipSub(ctx, host)
|
NodeA -> libp2pA: SetStreamHandler(/opencloud/probe/1.0, HandleBandwidthProbe)
|
||||||
libp2pA --> NodeA: ps (GossipSub)
|
NodeA -> libp2pA: SetStreamHandler(/opencloud/witness/1.0, HandleWitnessQuery)
|
||||||
|
|
||||||
NodeA -> IndexerA: ConnectToIndexers → SendHeartbeat /opencloud/heartbeat/1.0
|
NodeA -> libp2pA: NewGossipSub(ctx, host) → ps (GossipSub)
|
||||||
note over IndexerA: Heartbeat long-lived established\nQuality Score evaluated (bw + uptime + diversity)
|
|
||||||
IndexerA --> NodeA: OK
|
NodeA -> NodeA: buildRecord() closure\n→ signs fresh PeerRecord (expiry=now+2min)\n embedded in each heartbeat tick
|
||||||
|
|
||||||
|
NodeA -> IndexerA: ConnectToIndexers(host, minIndexer=1, maxIndexer=5, buildRecord)
|
||||||
|
note over IndexerA: Reads IndexerAddresses from config\nAdds seeds → Indexers Directory (IsSeed=true)\nLaunches SendHeartbeat goroutine (20s ticker)
|
||||||
|
|
||||||
|
IndexerA -> DHTA: proactive DHT discovery (after 5s warmup)\ninitNodeDHT(h, seeds)\nDiscoverIndexersFromDHT → SelectByFillRate\n→ add to Indexers Directory + NudgeIt()
|
||||||
|
|
||||||
NodeA -> NodeA: claimInfo(name, hostname)
|
NodeA -> NodeA: claimInfo(name, hostname)
|
||||||
NodeA -> IndexerA: TempStream /opencloud/record/publish/1.0
|
NodeA -> IndexerA: TempStream /opencloud/record/publish/1.0
|
||||||
NodeA -> IndexerA: stream.Encode(PeerRecord A signé)
|
NodeA -> IndexerA: stream.Encode(Signed PeerRecord A)
|
||||||
IndexerA -> IndexerA: DHT.PutValue("/node/"+DID_A, record)
|
IndexerA -> DHTA: PutValue("/node/"+DID_A, record)
|
||||||
|
|
||||||
NodeA -> DBA: DB(PEER).Search(SELF)
|
NodeA -> NodeA: StartGC(30s)
|
||||||
DBA --> NodeA: local peer A (or new generated UUID)
|
|
||||||
|
|
||||||
NodeA -> NodeA: StartGC(30s) — GarbageCollector on StreamRecords
|
|
||||||
|
|
||||||
NodeA -> StreamA: InitStream(ctx, host, PeerID_A, 1000, nodeA)
|
NodeA -> StreamA: InitStream(ctx, host, PeerID_A, 1000, nodeA)
|
||||||
StreamA -> StreamA: SetStreamHandler(heartbeat/partner, search, planner, ...)
|
StreamA -> StreamA: SetStreamHandler(resource/search, create, update,\n delete, planner, verify, considers)
|
||||||
StreamA -> DBA: Search(PEER, PARTNER) → partner list
|
|
||||||
DBA --> StreamA: Heartbeat long-lived established to partners
|
|
||||||
StreamA --> NodeA: StreamService A
|
StreamA --> NodeA: StreamService A
|
||||||
|
|
||||||
NodeA -> PubSubA: InitPubSub(ctx, host, ps, nodeA, streamA)
|
NodeA -> PubSubA: InitPubSub(ctx, host, ps, nodeA, streamA)
|
||||||
PubSubA -> PubSubA: subscribeEvents(PB_SEARCH, timeout=-1)
|
PubSubA -> PubSubA: subscribeEvents(PB_SEARCH, timeout=-1)
|
||||||
PubSubA --> NodeA: PubSubService A
|
PubSubA --> NodeA: PubSubService A
|
||||||
|
|
||||||
NodeA -> NodeA: SubscribeToSearch(ps, callback) (search global topic for resources)
|
NodeA -> NodeA: SubscribeToSearch(ps, callback)
|
||||||
note over NodeA: callback: GetPeerRecord(evt.From)\n→ StreamService.SendResponse
|
note over NodeA: callback: if evt.From != self\n → GetPeerRecord(evt.From)\n → StreamService.SendResponse
|
||||||
|
|
||||||
NodeA -> NATSA: ListenNATS(nodeA)
|
NodeA -> NATSA: ListenNATS(nodeA)
|
||||||
note over NATSA: Subscribes handlers:\nCREATE_RESOURCE, PROPALGATION_EVENT
|
note over NATSA: Subscribes:\nCREATE_RESOURCE → partner on-demand\nPROPALGATION_EVENT → resource propagation
|
||||||
|
|
||||||
NodeA --> MainA: *Node A is ready
|
NodeA --> MainA: *Node A is ready
|
||||||
|
|
||||||
|
note over NodeA,IndexerA: SendHeartbeat goroutine (permanent, 20s ticker):\nNode → Indexer : Heartbeat{name, PeerID, indexersBinded, need, challenges?, record}\nIndexer → Node : HeartbeatResponse{fillRate, challenges, suggestions, witnesses, suggestMigrate}\nScore updated (7 dimensions), pool managed autonomously
|
||||||
|
|
||||||
@enduml
|
@enduml
|
||||||
|
|||||||
@@ -1,49 +1,59 @@
|
|||||||
@startuml indexer_heartbeat
|
@startuml indexer_heartbeat
|
||||||
title Indexer — Heartbeat node → indexer (score on 5 metrics)
|
title Heartbeat bidirectionnel node → indexeur (scoring 7 dimensions + challenges)
|
||||||
|
|
||||||
participant "Node A" as NodeA
|
participant "Node A" as NodeA
|
||||||
participant "Node B" as NodeB
|
participant "Node B" as NodeB
|
||||||
participant "IndexerService" as Indexer
|
participant "IndexerService" as Indexer
|
||||||
|
|
||||||
note over NodeA,NodeB: Every node tick every 20s (SendHeartbeat)
|
note over NodeA,NodeB: SendHeartbeat goroutine — tick every 20s
|
||||||
|
|
||||||
par Node A heartbeat
|
== Tick Node A ==
|
||||||
NodeA -> Indexer: NewStream /opencloud/heartbeat/1.0
|
|
||||||
NodeA -> Indexer: stream.Encode(Heartbeat{Name, PeerID_A, IndexersBinded, Record})
|
|
||||||
|
|
||||||
Indexer -> Indexer: CheckHeartbeat(host, stream, dec, streams, mu, maxNodes)
|
NodeA -> Indexer: NewStream /opencloud/heartbeat/1.0\n(long-lived, réutilisé aux ticks suivants)
|
||||||
note over Indexer: len(h.Network().Peers()) >= maxNodes → reject
|
NodeA -> Indexer: stream.Encode(Heartbeat{\n name, PeerID_A, timestamp,\n indexersBinded: [addr1, addr2],\n need: maxPool - len(pool),\n challenges: [PeerID_A, PeerID_B], ← batch (tous les 1-10 HBs)\n challengeDID: "uuid-did-A", ← DHT challenge (tous les 5 batches)\n record: SignedPeerRecord_A ← expiry=now+2min\n})
|
||||||
|
|
||||||
Indexer -> Indexer: getBandwidthChallengeRate(host, remotePeer, 512-2048B)
|
Indexer -> Indexer: CheckHeartbeat(stream, maxNodes)\n→ len(Peers()) >= maxNodes → reject
|
||||||
|
|
||||||
Indexer -> Indexer: getOwnDiversityRate(host)\\nh.Network().Peers() + Peerstore.Addrs()\\n→ ratio /24 subnets distincts
|
Indexer -> Indexer: HandleHeartbeat → UptimeTracker.RecordHeartbeat()\n→ gap ≤ 2×interval : TotalOnline += gap
|
||||||
|
|
||||||
Indexer -> Indexer: fillRate = len(h.Network().Peers()) / maxNodes
|
Indexer -> Indexer: Republish PeerRecord A to DHT\nDHT.PutValue("/node/"+DID_A, record_A)
|
||||||
|
|
||||||
Indexer -> Indexer: Retrieve existing UptimeTracker\\noldTracker.RecordHeartbeat()\\n→ TotalOnline += gap si gap ≤ 120s\\nuptimeRatio = TotalOnline / time.Since(FirstSeen)
|
== Réponse indexeur → node A ==
|
||||||
|
|
||||||
Indexer -> Indexer: ComputeIndexerScore(\\n uptimeRatio, bpms, diversity,\\n latencyScore, fillRate\\n)\\nScore = (0.20×U + 0.20×B + 0.20×D + 0.15×L + 0.25×F) × 100
|
Indexer -> Indexer: BuildHeartbeatResponse(remotePeer=A, need, challenges, challengeDID)\n\nfillRate = connected_nodes / MaxNodesConn()\npeerCount = connected_nodes\nmaxNodes = MaxNodesConn()\nbornAt = time of indexer startup\n\nChallenges: pour chaque PeerID challengé\n found = PeerID dans StreamRecords[ProtocolHeartbeat]?\n lastSeen = HeartbeatStream.UptimeTracker.LastSeen\n\nDHT challenge:\n DHT.GetValue("/node/"+challengeDID, timeout=3s)\n → dhtFound + dhtPayload\n\nWitnesses: jusqu'à 3 AddrInfos de nœuds connectés\n (adresses connues dans Peerstore)\n\nSuggestions: jusqu'à `need` indexeurs depuis dhtCache\n (refresh asynchrone 2min, SelectByFillRate)\n\nSuggestMigrate: fillRate > 80%\n ET node dans offload.inBatch (batch ≤ 5, grace 3×HB)
|
||||||
|
|
||||||
Indexer -> Indexer: dynamicMinScore(age)\\n= 20 + 60×(hours/24), max 80
|
Indexer --> NodeA: stream.Encode(HeartbeatResponse{\n fillRate, peerCount, maxNodes, bornAt,\n challenges, dhtFound, dhtPayload,\n witnesses, suggestions, suggestMigrate\n})
|
||||||
|
|
||||||
alt Score A < dynamicMinScore(age)
|
== Traitement score côté Node A ==
|
||||||
Indexer -> NodeA: (close stream — "not enough trusting value")
|
|
||||||
else Score A >= dynamicMinScore(age)
|
NodeA -> NodeA: score = ensureScore(Indexers, addr_indexer)\nscore.UptimeTracker.RecordHeartbeat()\n\nlatencyScore = max(0, 1 - RTT / (BaseRoundTrip × 10))\n\nBornAt stability:\n bornAt changed? → score.bornAtChanges++\n\nfillConsistency:\n expected = peerCount / maxNodes\n |expected - fillRate| < 10% → fillConsistent++\n\nChallenge PeerID (ground truth own PeerID):\n found=true AND lastSeen < 2×interval → challengeCorrect++\n\nDHT challenge:\n dhtFound=true → dhtSuccess++\n\nWitness query (async):\n go queryWitnesses(h, indexerID, bornAt, fillRate, witnesses, score)
|
||||||
Indexer -> Indexer: streams[PeerID_A].HeartbeatStream = hb.Stream\\nstreams[PeerID_A].HeartbeatStream.UptimeTracker = oldTracker\\nstreams[PeerID_A].LastScore = hb.Score
|
|
||||||
note over Indexer: AfterHeartbeat → republish PeerRecord on DHT
|
NodeA -> NodeA: score.Score = ComputeNodeSideScore(latencyScore)\n\nScore = (\n 0.20 × uptimeRatio\n+ 0.20 × challengeAccuracy\n+ 0.15 × latencyScore\n+ 0.10 × fillScore ← 1 - fillRate\n+ 0.10 × fillConsistency\n+ 0.15 × witnessConsistency\n+ 0.10 × dhtSuccessRate\n) × 100 × bornAtPenalty\n\nbornAtPenalty = max(0, 1 - 0.30 × bornAtChanges)\nminScore = clamp(20 + 60 × (age.Hours/24), 20, 80)
|
||||||
|
|
||||||
|
alt score < minScore\n AND TotalOnline ≥ 2×interval\n AND !IsSeed\n AND len(pool) > 1
|
||||||
|
NodeA -> NodeA: evictPeer(dir, addr, id, proto)\n→ delete Addr + Score + Stream\ngo TriggerConsensus(h, voters, need)\n ou replenishIndexersFromDHT(h, need)
|
||||||
end
|
end
|
||||||
|
|
||||||
else Node B heartbeat
|
alt resp.SuggestMigrate == true AND nonSeedCount >= MinIndexer
|
||||||
NodeB -> Indexer: NewStream /opencloud/heartbeat/1.0
|
alt IsSeed
|
||||||
NodeB -> Indexer: stream.Encode(Heartbeat{Name, PeerID_B, IndexersBinded, Record})
|
NodeA -> NodeA: score.IsSeed = false\n(de-stickied — score eviction maintenant possible)
|
||||||
|
else !IsSeed
|
||||||
Indexer -> Indexer: CheckHeartbeat → getBandwidthChallengeRate\\n→ getOwnDiversityRate → ComputeIndexerScore(5 composants)
|
NodeA -> NodeA: evictPeer → migration acceptée
|
||||||
|
end
|
||||||
alt Score B >= dynamicMinScore(age)
|
|
||||||
Indexer -> Indexer: streams[PeerID_B] subscribed + LastScore updated
|
|
||||||
end
|
end
|
||||||
end par
|
|
||||||
|
|
||||||
note over Indexer: GC ticker 30s — gc()\\nnow.After(Expiry) où Expiry = lastHBTime + 2min\\n→ AfterDelete(pid, name, did)
|
alt len(resp.Suggestions) > 0
|
||||||
|
NodeA -> NodeA: handleSuggestions(dir, indexerID, suggestions)\n→ inconnus ajoutés à Indexers Directory\n→ NudgeIt() si ajout effectif
|
||||||
|
end
|
||||||
|
|
||||||
|
== Tick Node B (concurrent) ==
|
||||||
|
|
||||||
|
NodeB -> Indexer: stream.Encode(Heartbeat{PeerID_B, ...})
|
||||||
|
Indexer -> Indexer: CheckHeartbeat → UptimeTracker → BuildHeartbeatResponse
|
||||||
|
Indexer --> NodeB: HeartbeatResponse{...}
|
||||||
|
|
||||||
|
== GC côté Indexeur ==
|
||||||
|
|
||||||
|
note over Indexer: GC ticker 30s — gc()\nnow.After(Expiry) où Expiry = lastHBTime + 2min\n→ AfterDelete(pid, name, did) hors lock\n→ publishNameEvent(NameIndexDelete, ...)\nFillRate recalculé automatiquement
|
||||||
|
|
||||||
@enduml
|
@enduml
|
||||||
|
|||||||
@@ -1,42 +1,38 @@
|
|||||||
@startuml
|
@startuml
|
||||||
title NATS — CREATE_RESOURCE : Peer A Create/Update Peer B & establishing stream
|
title NATS — CREATE_RESOURCE : Peer A crée/met à jour Peer B (connexion on-demand)
|
||||||
|
|
||||||
participant "App Peer A (oc-api)" as AppA
|
participant "App Peer A (oc-api)" as AppA
|
||||||
participant "NATS A" as NATSA
|
participant "NATS A" as NATSA
|
||||||
participant "Node A" as NodeA
|
participant "Node A" as NodeA
|
||||||
participant "StreamService A" as StreamA
|
participant "StreamService A" as StreamA
|
||||||
participant "Node B" as NodeB
|
participant "Node B" as NodeB
|
||||||
participant "StreamService B" as StreamB
|
|
||||||
participant "DB Peer A (oc-lib)" as DBA
|
participant "DB Peer A (oc-lib)" as DBA
|
||||||
|
|
||||||
note over AppA: Peer B is discovered\n(per indexer or manually)
|
note over AppA: Peer B est découvert\n(via indexeur ou manuellement)
|
||||||
|
|
||||||
AppA -> NATSA: Publish(CREATE_RESOURCE, {\n FromApp:"oc-api",\n Datatype:PEER,\n Payload: Peer B {StreamAddress_B, Relation:PARTNER}\n})
|
AppA -> NATSA: Publish(CREATE_RESOURCE, {\n FromApp:"oc-api",\n Datatype:PEER,\n Payload: Peer B {StreamAddress_B, Relation:PARTNER}\n})
|
||||||
|
|
||||||
NATSA -> NodeA: ListenNATS callback → CREATE_RESOURCE
|
NATSA -> NodeA: ListenNATS callback → CREATE_RESOURCE
|
||||||
|
|
||||||
NodeA -> NodeA: if from himself ? → No, continue
|
|
||||||
NodeA -> NodeA: json.Unmarshal(payload) → peer.Peer B
|
NodeA -> NodeA: json.Unmarshal(payload) → peer.Peer B
|
||||||
|
NodeA -> NodeA: if peer == self ? → skip
|
||||||
|
|
||||||
alt peer B.Relation == PARTNER
|
alt peer B.Relation == PARTNER
|
||||||
NodeA -> StreamA: ConnectToPartner(B.StreamAddress)
|
NodeA -> StreamA: ToPartnerPublishEvent(ctx, PB_CREATE, PEER, payload)
|
||||||
StreamA -> NodeB: Connect (libp2p)
|
note over StreamA: Pas de heartbeat permanent.\nConnexion on-demand : ouvre un stream,\nenvoie l'événement, ferme ou laisse expirer.
|
||||||
StreamA -> NodeB: NewStream /opencloud/resource/heartbeat/partner/1.0
|
StreamA -> StreamA: PublishCommon(PEER, user, B.PeerID,\n ProtocolUpdateResource, selfPeerJSON)
|
||||||
StreamA -> NodeB: json.Encode(Heartbeat{Name_A, DID_A, PeerID_A})
|
StreamA -> NodeB: TempStream /opencloud/resource/update/1.0\n(TTL court, fermé après envoi)
|
||||||
|
StreamA -> NodeB: stream.Encode(Event{from, datatype, payload})
|
||||||
|
NodeB --> StreamA: (traitement applicatif)
|
||||||
|
|
||||||
NodeB -> StreamB: HandlePartnerHeartbeat(stream)
|
else peer B.Relation != PARTNER (révocation / blacklist)
|
||||||
StreamB -> StreamB: CheckHeartbeat → bandwidth challenge
|
note over NodeA: Ferme tous les streams existants vers Peer B
|
||||||
StreamB -> StreamB: streams[ProtocolHeartbeatPartner][PeerID_A] = {DID_A, Expiry=now+10s}
|
loop Pour chaque stream actif vers PeerID_B
|
||||||
|
|
||||||
StreamA -> StreamA: streams[ProtocolHeartbeatPartner][PeerID_B] = {DID_B, Expiry=now+10s}
|
|
||||||
note over StreamA,StreamB: Stream partner long-lived établi\nbi-directionnal
|
|
||||||
else peer B.Relation != PARTNER (revoke / blacklist)
|
|
||||||
note over NodeA: Suppress all streams onto Peer B
|
|
||||||
loop For every Streams
|
|
||||||
NodeA -> StreamA: streams[proto][PeerID_B].Stream.Close()
|
NodeA -> StreamA: streams[proto][PeerID_B].Stream.Close()
|
||||||
NodeA -> StreamA: delete(streams[proto], PeerID_B)
|
NodeA -> StreamA: delete(streams[proto], PeerID_B)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
NodeA -> DBA: (no write — only app source manually add peer)
|
|
||||||
|
NodeA -> DBA: (pas d'écriture directe — seule l'app source gère la DB)
|
||||||
|
|
||||||
@enduml
|
@enduml
|
||||||
|
|||||||
@@ -1,42 +1,35 @@
|
|||||||
@startuml 25_failure_node_gc
|
@startuml 25_failure_node_gc
|
||||||
title F7 — Crash nœud → GC indexeur + AfterDelete
|
title F7 — Crash nœud → GC indexeur + AfterDelete
|
||||||
|
|
||||||
participant "Node\\n(crashé)" as N
|
participant "Node\n(crashé)" as N
|
||||||
participant "Indexer A" as IA
|
participant "Indexer A" as IA
|
||||||
participant "Indexer B" as IB
|
participant "Indexer B" as IB
|
||||||
participant "Native A" as NA
|
|
||||||
|
|
||||||
note over N, NA: État nominal : N heartbeatait vers IA et IB
|
note over N, IB: État nominal : N heartbeatait vers IA et IB
|
||||||
|
|
||||||
== Crash Node ==
|
== Crash Node ==
|
||||||
N ->x IA: stream reset (heartbeat coupé)
|
N ->x IA: stream reset (heartbeat coupé)
|
||||||
N ->x IB: stream reset (heartbeat coupé)
|
N ->x IB: stream reset (heartbeat coupé)
|
||||||
|
|
||||||
== GC côté Indexer A ==
|
== GC côté Indexer A ==
|
||||||
note over IA: HandleHeartbeat : stream reset détecté\\nStreamRecords[ProtocolHB][N].LastSeen figé
|
note over IA: HandleHeartbeat : stream reset détecté\nStreamRecords[ProtocolHeartbeat][N].Expiry figé
|
||||||
|
|
||||||
loop ticker GC (30s) — StartGC(30*time.Second)
|
loop ticker GC (30s) — StartGC(30*time.Second)
|
||||||
IA -> IA: gc()\\nnow.After(Expiry) où Expiry = lastHBTime + 2min\\n→ si 2min sans heartbeat → éviction
|
IA -> IA: gc()\nnow.After(Expiry) où Expiry = lastHBTime + 2min\n→ si 2min sans heartbeat → éviction
|
||||||
IA -> IA: delete(StreamRecords[ProtocolHB][N])\\nAfterDelete(N, name, did) appelé hors lock
|
IA -> IA: delete(StreamRecords[ProtocolHeartbeat][N])\nAfterDelete(N, name, did) appelé hors lock
|
||||||
note over IA: N retiré du registre vivant.\\nFillRate recalculé (n-1 / maxNodes).
|
note over IA: N retiré du registre vivant.\nFillRate recalculé : (n-1) / MaxNodesConn()
|
||||||
end
|
end
|
||||||
|
|
||||||
== Impact sur le scoring / fill rate ==
|
== Impact fill rate ==
|
||||||
note over IA: FillRate diminue\\nProchain subscribe vers NA inclura FillRate mis à jour
|
note over IA: FillRate diminue.\nProchain BuildHeartbeatResponse\ninclura FillRate mis à jour.\nSi fillRate revient < 80% :\n→ offload.inBatch et alreadyTried réinitialisés.
|
||||||
|
|
||||||
IA -> NA: /opencloud/native/subscribe/1.0\\nIndexerRegistration{FillRate: 0.3} /' était 0.5 '/
|
== GC côté Indexer B ==
|
||||||
|
note over IB: Même GC effectué.\nN retiré de StreamRecords[ProtocolHeartbeat].
|
||||||
NA -> NA: liveIndexerEntry[IA].FillRate = 0.3\\nPriorité de routage recalculée : w(0.3) = 0.21
|
|
||||||
|
|
||||||
== Impact sur la Phase 2 (indexerLivenessVote) ==
|
|
||||||
note over IA: Si un autre nœud demande consensus,\\nN n'est plus dans StreamRecords.\\nN absent de la réponse Alive[].
|
|
||||||
|
|
||||||
note over IB: Même GC effectué côté IB.\\nN retiré de StreamRecords[ProtocolHB].
|
|
||||||
|
|
||||||
== Reconnexion éventuelle du nœud ==
|
== Reconnexion éventuelle du nœud ==
|
||||||
N -> N: redémarrage
|
N -> N: redémarrage
|
||||||
N -> IA: SendHeartbeat /opencloud/heartbeat/1.0\\nHeartbeat{Score: X, IndexersBinded: 2}
|
N -> IA: SendHeartbeat /opencloud/heartbeat/1.0\nHeartbeat{name, PeerID_N, IndexersBinded, need, record}
|
||||||
IA -> IA: HandleHeartbeat → nouveau UptimeTracker(FirstSeen=now)\\nStreamRecords[ProtocolHB][N] recréé
|
IA -> IA: HandleHeartbeat → UptimeTracker(FirstSeen=now)\nStreamRecords[ProtocolHeartbeat][N] recréé\nRepublish PeerRecord N dans DHT
|
||||||
note over IA: N de retour avec FirstSeen frais.\\ndynamicMinScore élevé tant que age < 24h.
|
note over IA: N de retour avec FirstSeen frais.\ndynamicMinScore élevé tant que age < 24h.\n(phase de grâce : 2 ticks avant scoring)
|
||||||
|
|
||||||
@enduml
|
@enduml
|
||||||
|
|||||||
@@ -3,73 +3,86 @@
|
|||||||
Tous les fichiers sont au format [PlantUML](https://plantuml.com/).
|
Tous les fichiers sont au format [PlantUML](https://plantuml.com/).
|
||||||
Rendu possible via VS Code (extension PlantUML), IntelliJ, ou [plantuml.com/plantuml](https://www.plantuml.com/plantuml/uml/).
|
Rendu possible via VS Code (extension PlantUML), IntelliJ, ou [plantuml.com/plantuml](https://www.plantuml.com/plantuml/uml/).
|
||||||
|
|
||||||
## Diagrammes de séquence (flux internes)
|
> **Note :** Les diagrammes 06, 07, 12, 14–24 et plusieurs protocoles ci-dessous
|
||||||
|
> concernaient l'architecture à 3 niveaux (node → indexer → native indexer),
|
||||||
|
> supprimée dans la branche `feature/no_native_consortium`. Ces fichiers sont
|
||||||
|
> conservés à titre historique. Les diagrammes actifs sont indiqués ci-dessous.
|
||||||
|
|
||||||
|
## Diagrammes actifs (architecture 2 niveaux)
|
||||||
|
|
||||||
|
### Séquences principales
|
||||||
|
|
||||||
| Fichier | Description |
|
| Fichier | Description |
|
||||||
|---------|-------------|
|
|---------|-------------|
|
||||||
| `01_node_init.puml` | Initialisation complète d'un Node (libp2p host, GossipSub, indexers, StreamService, PubSubService, NATS) |
|
| `01_node_init.puml` | Initialisation d'un Node : libp2p host + PSK + ConnectionGater + ConnectToIndexers + SendHeartbeat + DHT proactive |
|
||||||
| `02_node_claim.puml` | Enregistrement du nœud auprès des indexeurs (`claimInfo` + `publishPeerRecord`) |
|
| `02_node_claim.puml` | Enregistrement du nœud : `claimInfo` + `publishPeerRecord` → indexeurs → DHT |
|
||||||
| `03_indexer_heartbeat.puml` | Protocole heartbeat avec score 5 composants (U/B/D/L/F), UptimeTracker, dynamicMinScore |
|
| `03_indexer_heartbeat.puml` | Protocole heartbeat bidirectionnel : challenges PeerID + DHT + witness, scoring 7 dimensions, suggestions, SuggestMigrate |
|
||||||
| `04_indexer_publish.puml` | Publication d'un `PeerRecord` vers l'indexeur → DHT |
|
| `04_indexer_publish.puml` | Publication d'un `PeerRecord` vers l'indexeur → DHT (PutValue /node, /name, /pid) |
|
||||||
| `05_indexer_get.puml` | Résolution d'un pair via l'indexeur (`GetPeerRecord` + `handleNodeGet` + DHT) |
|
| `05_indexer_get.puml` | Résolution d'un pair : `GetPeerRecord` → indexeur → DHT si absent local |
|
||||||
| `06_native_registration.puml` | Enregistrement d'un indexeur auprès du Native (FillRate, signature, TTL 90s, unsubscribe) |
|
| `08_nats_create_resource.puml` | Handler NATS `CREATE_RESOURCE` : propagation partenaires on-demand |
|
||||||
| `07_native_get_consensus.puml` | `ConnectToNatives` : fetch pool + Phase 1 (clientSideConsensus) + Phase 2 (indexerLivenessVote) |
|
|
||||||
| `08_nats_create_resource.puml` | Handler NATS `CREATE_RESOURCE` : connexion/déconnexion d'un partner |
|
|
||||||
| `09_nats_propagation.puml` | Handler NATS `PROPALGATION_EVENT` : delete, considers, planner, search |
|
| `09_nats_propagation.puml` | Handler NATS `PROPALGATION_EVENT` : delete, considers, planner, search |
|
||||||
| `10_pubsub_search.puml` | Recherche gossip globale (type `"all"`) via GossipSub |
|
| `10_pubsub_search.puml` | Recherche gossip globale (GossipSub /opencloud/search/1.0) |
|
||||||
| `11_stream_search.puml` | Recherche directe par stream (type `"known"` ou `"partner"`) |
|
| `11_stream_search.puml` | Recherche directe par stream (type `"known"` ou `"partner"`) |
|
||||||
| `12_partner_heartbeat.puml` | Heartbeat partner + propagation CRUD vers les partenaires |
|
|
||||||
| `13_planner_flow.puml` | Session planner (ouverture, échange, fermeture) |
|
| `13_planner_flow.puml` | Session planner (ouverture, échange, fermeture) |
|
||||||
| `14_native_offload_gc.puml` | Boucles background du Native Indexer (offload, DHT refresh, GC) |
|
|
||||||
|
|
||||||
## Diagrammes de topologie et flux de panne
|
### Résilience et pool management
|
||||||
|
|
||||||
### Configurations réseau
|
|
||||||
|
|
||||||
| Fichier | Description |
|
| Fichier | Description |
|
||||||
|---------|-------------|
|
|---------|-------------|
|
||||||
| `15_archi_config_nominale.puml` | C1 — Topologie nominale : 2 natifs · 2 indexeurs · 2 nœuds, tous flux |
|
| `hb_failure_evict.puml` | HeartbeatFailure → evictPeer → TriggerConsensus ou DHT replenish |
|
||||||
| `16_archi_config_seed.puml` | C2 — Mode seed sans natif : indexeurs à AdmittedAt=0, risque D20 actif |
|
| `hb_last_indexer.puml` | Protection last-indexer → reconnectToSeeds → retryUntilSeedResponds |
|
||||||
|
| `dht_discovery.puml` | Découverte proactive DHT : Provide/FindProviders, SelectByFillRate, dhtCache |
|
||||||
|
| `connection_gater.puml` | ConnectionGater : DB blacklist → DHT sequential check (transport-error fallthrough) |
|
||||||
|
|
||||||
### Flux de démarrage
|
## Diagrammes historiques (architecture 3 niveaux — obsolètes)
|
||||||
|
|
||||||
|
Ces fichiers documentent l'ancienne architecture. Ils ne correspondent plus
|
||||||
|
au code en production.
|
||||||
|
|
||||||
| Fichier | Description |
|
| Fichier | Description |
|
||||||
|---------|-------------|
|
|---------|-------------|
|
||||||
| `17_startup_consensus_phase1_phase2.puml` | Démarrage nominal : Phase 1 (admission native) + Phase 2 (liveness vote) |
|
| `06_native_registration.puml` | Enregistrement d'un indexeur auprès du Native (supprimé) |
|
||||||
| `18_startup_seed_discovers_native.puml` | Upgrade seed → nominal : goroutine async découvre un natif via l'indexeur |
|
| `07_native_get_consensus.puml` | `ConnectToNatives` : fetch pool + Phase 1 + Phase 2 (supprimé) |
|
||||||
|
| `12_partner_heartbeat.puml` | Heartbeat partner permanent (supprimé — connexions on-demand) |
|
||||||
|
| `14_native_offload_gc.puml` | Boucles background Native Indexer (supprimé) |
|
||||||
|
| `15_archi_config_nominale.puml` | Topologie nominale avec natifs (obsolète) |
|
||||||
|
| `16_archi_config_seed.puml` | Mode seed sans natif (obsolète) |
|
||||||
|
| `17_startup_consensus_phase1_phase2.puml` | Démarrage avec consensus natifs (supprimé) |
|
||||||
|
| `18_startup_seed_discovers_native.puml` | Upgrade seed → native (supprimé) |
|
||||||
|
| `19_failure_indexer_crash.puml` | F1 — replenish depuis natif (supprimé) |
|
||||||
|
| `20_failure_both_indexers_selfdelegate.puml` | F2 — IsSelfFallback native (supprimé) |
|
||||||
|
| `21_failure_native_one_down.puml` | F3 — panne 1 natif (supprimé) |
|
||||||
|
| `22_failure_both_natives.puml` | F4 — panne 2 natifs (supprimé) |
|
||||||
|
| `23_failure_native_plus_indexer.puml` | F5 — panne combinée natif + indexeur (supprimé) |
|
||||||
|
| `24_failure_retry_lost_native.puml` | F6 — retryLostNative (supprimé) |
|
||||||
|
| `25_failure_node_gc.puml` | F7 — GC nœud côté indexeur (toujours valide) |
|
||||||
|
|
||||||
### Flux de panne
|
## Protocoles libp2p actifs
|
||||||
|
|
||||||
| Fichier | Code | Description |
|
|
||||||
|---------|------|-------------|
|
|
||||||
| `19_failure_indexer_crash.puml` | F1 | Panne 1 indexeur → replenish depuis natif → IC admis |
|
|
||||||
| `20_failure_both_indexers_selfdelegate.puml` | F2 | Panne 2 indexeurs → natif `IsSelfFallback=true`, runOffloadLoop |
|
|
||||||
| `21_failure_native_one_down.puml` | F3 | Panne 1 natif → quorum 1/1 suffisant, mode dégradé |
|
|
||||||
| `22_failure_both_natives.puml` | F4 | Panne 2 natifs → fallback pool pré-validé, retryLostNative |
|
|
||||||
| `23_failure_native_plus_indexer.puml` | F5 | Panne combinée : 1 natif + 1 indexeur → double replenish |
|
|
||||||
| `24_failure_retry_lost_native.puml` | F6 | Panne réseau transitoire → retryLostNative (30s ticker) |
|
|
||||||
| `25_failure_node_gc.puml` | F7 | Crash nœud → GC indexeur (120s), AfterDelete, fill rate recalculé |
|
|
||||||
|
|
||||||
## Protocoles libp2p utilisés (référence complète)
|
|
||||||
|
|
||||||
| Protocole | Description |
|
| Protocole | Description |
|
||||||
|-----------|-------------|
|
|-----------|-------------|
|
||||||
| `/opencloud/heartbeat/1.0` | Heartbeat universel : node→indexeur, indexeur→native, native→native (long-lived) |
|
| `/opencloud/heartbeat/1.0` | Heartbeat bidirectionnel node→indexeur (long-lived) |
|
||||||
| `/opencloud/probe/1.0` | Sonde de bande passante (echo, mesure latence + débit) |
|
| `/opencloud/probe/1.0` | Sonde de bande passante (echo, mesure latence + débit) |
|
||||||
| `/opencloud/resource/heartbeat/partner/1.0` | Heartbeat node ↔ partner (long-lived) |
|
| `/opencloud/witness/1.0` | Requête témoin : "quel est ton score de l'indexeur X ?" |
|
||||||
| `/opencloud/record/publish/1.0` | Publication `PeerRecord` vers indexeur |
|
| `/opencloud/record/publish/1.0` | Publication `PeerRecord` vers indexeur |
|
||||||
| `/opencloud/record/get/1.0` | Requête `GetPeerRecord` vers indexeur |
|
| `/opencloud/record/get/1.0` | Requête `GetPeerRecord` vers indexeur |
|
||||||
| `/opencloud/native/subscribe/1.0` | Enregistrement indexeur auprès du native (+ FillRate) |
|
|
||||||
| `/opencloud/native/unsubscribe/1.0` | Désenregistrement explicite indexeur → native |
|
|
||||||
| `/opencloud/native/indexers/1.0` | Requête de pool d'indexeurs au native (tri par w(F)=F×(1-F)) |
|
|
||||||
| `/opencloud/native/consensus/1.0` | Phase 1 : validation de pool d'indexeurs (vote majoritaire natifs) |
|
|
||||||
| `/opencloud/native/peers/1.0` | Demande de pairs natifs connus (replenish mesh natif) |
|
|
||||||
| `/opencloud/indexer/natives/1.0` | Demande d'adresses de natifs connus par un indexeur |
|
|
||||||
| `/opencloud/indexer/consensus/1.0` | Phase 2 : liveness vote (LastSeen ≤ 120s && LastScore ≥ 30) |
|
|
||||||
| `/opencloud/resource/search/1.0` | Recherche de ressources entre peers |
|
| `/opencloud/resource/search/1.0` | Recherche de ressources entre peers |
|
||||||
| `/opencloud/resource/create/1.0` | Propagation création ressource vers partner |
|
| `/opencloud/resource/create/1.0` | Propagation création ressource → partner |
|
||||||
| `/opencloud/resource/update/1.0` | Propagation mise à jour ressource vers partner |
|
| `/opencloud/resource/update/1.0` | Propagation mise à jour ressource → partner |
|
||||||
| `/opencloud/resource/delete/1.0` | Propagation suppression ressource vers partner |
|
| `/opencloud/resource/delete/1.0` | Propagation suppression ressource → partner |
|
||||||
| `/opencloud/resource/planner/1.0` | Session planner (booking) |
|
| `/opencloud/resource/planner/1.0` | Session planner (booking) |
|
||||||
| `/opencloud/resource/verify/1.0` | Vérification signature ressource |
|
| `/opencloud/resource/verify/1.0` | Vérification signature ressource |
|
||||||
| `/opencloud/resource/considers/1.0` | Transmission d'un "considers" d'exécution |
|
| `/opencloud/resource/considers/1.0` | Transmission d'un considers d'exécution |
|
||||||
|
|
||||||
|
## Protocoles supprimés (architecture native)
|
||||||
|
|
||||||
|
| Protocole | Raison |
|
||||||
|
|-----------|--------|
|
||||||
|
| `/opencloud/native/subscribe/1.0` | Tier native supprimé |
|
||||||
|
| `/opencloud/native/unsubscribe/1.0` | Tier native supprimé |
|
||||||
|
| `/opencloud/native/indexers/1.0` | Remplacé par DHT FindProviders |
|
||||||
|
| `/opencloud/native/consensus/1.0` | Remplacé par TriggerConsensus léger |
|
||||||
|
| `/opencloud/native/peers/1.0` | Tier native supprimé |
|
||||||
|
| `/opencloud/indexer/natives/1.0` | Tier native supprimé |
|
||||||
|
| `/opencloud/indexer/consensus/1.0` | Remplacé par TriggerConsensus |
|
||||||
|
| `/opencloud/resource/heartbeat/partner/1.0` | Heartbeat partner supprimé — on-demand |
|
||||||
|
|||||||
69
docs/diagrams/connection_gater.puml
Normal file
69
docs/diagrams/connection_gater.puml
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
@startuml connection_gater
|
||||||
|
title ConnectionGater — Vérification à l'admission (InterceptSecured)
|
||||||
|
|
||||||
|
participant "Remote Peer\n(inbound)" as Remote
|
||||||
|
participant "libp2p\nhost A" as Host
|
||||||
|
participant "OCConnectionGater" as Gater
|
||||||
|
participant "DB (oc-lib)" as DB
|
||||||
|
participant "Indexer X\n(joignable)" as IX
|
||||||
|
participant "Indexer Y\n(injoignable)" as IY
|
||||||
|
|
||||||
|
Remote -> Host: inbound connection (post-PSK, post-TLS)
|
||||||
|
Host -> Gater: InterceptSecured(dir=Inbound, id=RemotePeerID, conn)
|
||||||
|
|
||||||
|
alt dir == Outbound
|
||||||
|
Gater --> Host: true (outbound toujours autorisé)
|
||||||
|
end
|
||||||
|
|
||||||
|
== Étape 1 : Vérification base de données ==
|
||||||
|
|
||||||
|
Gater -> DB: NewRequestAdmin(PEER).Search(\n Filter: peer_id = RemotePeerID\n)
|
||||||
|
DB --> Gater: []peer.Peer
|
||||||
|
|
||||||
|
alt trouvé AND relation == BLACKLIST
|
||||||
|
Gater --> Host: false (refusé — blacklisté)
|
||||||
|
Host ->x Remote: connexion fermée
|
||||||
|
end
|
||||||
|
|
||||||
|
alt trouvé AND relation != BLACKLIST
|
||||||
|
Gater --> Host: true (connu et non blacklisté)
|
||||||
|
end
|
||||||
|
|
||||||
|
== Étape 2 : Vérification DHT (peer inconnu en DB) ==
|
||||||
|
|
||||||
|
note over Gater: Peer inconnu → vérifier qu'il existe\ndans le réseau DHT
|
||||||
|
|
||||||
|
Gater -> Gater: getReq = GetValue{PeerID: RemotePeerID}
|
||||||
|
|
||||||
|
loop Pour chaque indexeur (ordre aléatoire — Shuffle)
|
||||||
|
|
||||||
|
alt Indexer IY injoignable (transport error)
|
||||||
|
Gater -> IY: h.Connect(ctxTTL, IY_AddrInfo)
|
||||||
|
IY -->x Gater: connexion échouée
|
||||||
|
note over Gater: reachable=false\n→ essaie le suivant
|
||||||
|
end
|
||||||
|
|
||||||
|
alt Indexer IX joignable
|
||||||
|
Gater -> IX: h.Connect(ctxTTL, IX_AddrInfo)
|
||||||
|
IX --> Gater: OK
|
||||||
|
Gater -> IX: TempStream /opencloud/record/get/1.0
|
||||||
|
Gater -> IX: stream.Encode(GetValue{PeerID: RemotePeerID})
|
||||||
|
IX -> IX: Recherche locale + DHT si absent
|
||||||
|
IX --> Gater: GetResponse{Found: true/false, Records}
|
||||||
|
note over Gater: reachable=true → réponse autoritaire\n(DHT distribué : un seul indexeur suffit)
|
||||||
|
|
||||||
|
alt Found == true
|
||||||
|
Gater --> Host: true (pair connu du réseau)
|
||||||
|
else Found == false
|
||||||
|
Gater --> Host: false (refusé — inconnu du réseau)
|
||||||
|
Host ->x Remote: connexion fermée
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
alt Aucun indexeur joignable
|
||||||
|
note over Gater: Réseau naissant ou tous isolés.\nAutorisation par défaut.
|
||||||
|
Gater --> Host: true
|
||||||
|
end
|
||||||
|
|
||||||
|
@enduml
|
||||||
56
docs/diagrams/dht_discovery.puml
Normal file
56
docs/diagrams/dht_discovery.puml
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
@startuml dht_discovery
|
||||||
|
title Découverte DHT : Provide/FindProviders + SelectByFillRate + dhtCache indexeur
|
||||||
|
|
||||||
|
participant "Indexer A\n(nouveau)" as IA
|
||||||
|
participant "DHT Network" as DHT
|
||||||
|
participant "Node B\n(bootstrap)" as NodeB
|
||||||
|
participant "Indexer A\n(existant)" as IAexist
|
||||||
|
|
||||||
|
== Inscription indexeur dans la DHT ==
|
||||||
|
|
||||||
|
note over IA: Démarrage IndexerService\nstartDHTProvide(fillRateFn)
|
||||||
|
|
||||||
|
IA -> IA: Attend adresse routable (max 60s)\nnon-loopback disponible
|
||||||
|
|
||||||
|
IA -> DHT: DHT.Bootstrap(ctx)\n→ routing table warmup
|
||||||
|
|
||||||
|
loop ticker RecommendedHeartbeatInterval (~20s)
|
||||||
|
IA -> DHT: DHT.Provide(IndexerCID, true)\n← IndexerCID = CID(sha256("/opencloud/indexers"))
|
||||||
|
note over DHT: L'indexeur est annoncé comme provider.\nTTL géré par libp2p-kad-dht.\nAuto-expire si Provide() s'arrête.
|
||||||
|
end
|
||||||
|
|
||||||
|
== Cache DHT passif de l'indexeur ==
|
||||||
|
|
||||||
|
note over IA: startDHTCacheRefresh()\ngoroutine arrière-plan
|
||||||
|
|
||||||
|
IA -> IA: Initial delay 30s (routing table warmup)
|
||||||
|
|
||||||
|
loop ticker 2min
|
||||||
|
IA -> DHT: DiscoverIndexersFromDHT(h, dht, 30)\n← FindProviders(IndexerCID, max=30)
|
||||||
|
DHT --> IA: []AddrInfo (jusqu'à 30 candidats)
|
||||||
|
IA -> IA: Filtre self\nSelectByFillRate(filtered, nil, 10)\n→ diversité /24, prior f=0.5 (fill rates inconnus)
|
||||||
|
IA -> IA: dhtCache = selected (max 10)\n→ utilisé pour Suggestions dans BuildHeartbeatResponse
|
||||||
|
end
|
||||||
|
|
||||||
|
== Découverte côté Node au bootstrap ==
|
||||||
|
|
||||||
|
NodeB -> NodeB: ConnectToIndexers → seeds ajoutés\nSendHeartbeat démarré
|
||||||
|
|
||||||
|
NodeB -> NodeB: goroutine proactive (après 5s warmup)
|
||||||
|
|
||||||
|
alt discoveryDHT == nil (node pur, pas d'IndexerService)
|
||||||
|
NodeB -> DHT: initNodeDHT(h, seeds)\n← DHT client mode, bootstrappé sur seeds
|
||||||
|
end
|
||||||
|
|
||||||
|
NodeB -> DHT: DiscoverIndexersFromDHT(h, discoveryDHT, need+extra)
|
||||||
|
DHT --> NodeB: []AddrInfo candidats
|
||||||
|
|
||||||
|
NodeB -> NodeB: Filtre self\nSelectByFillRate(candidates, fillRates, need)\n→ pondération w(F) = F×(1-F)\n F=0.2 → w=0.16 (très probable)\n F=0.5 → w=0.25 (max)\n F=0.8 → w=0.16 (peu probable)\n→ filtre diversité /24
|
||||||
|
|
||||||
|
loop Pour chaque candidat retenu
|
||||||
|
NodeB -> NodeB: Indexers.SetAddr(key, &addrInfo)\nNudgeIt() → heartbeat immédiat
|
||||||
|
end
|
||||||
|
|
||||||
|
note over NodeB: Pool enrichi au-delà des seeds.\nScoring commence au premier heartbeat.\nSeeds restent IsSeed=true (stickiness).
|
||||||
|
|
||||||
|
@enduml
|
||||||
41
docs/diagrams/hb_failure_evict.puml
Normal file
41
docs/diagrams/hb_failure_evict.puml
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
@startuml hb_failure_evict
|
||||||
|
title HeartbeatFailure → evictPeer → TriggerConsensus ou DHT replenish
|
||||||
|
|
||||||
|
participant "Node A" as NodeA
|
||||||
|
participant "Indexer X\n(défaillant)" as IX
|
||||||
|
participant "Indexer Y\n(voter)" as IY
|
||||||
|
participant "Indexer Z\n(voter)" as IZ
|
||||||
|
participant "DHT" as DHT
|
||||||
|
participant "Indexer NEW\n(candidat)" as INEW
|
||||||
|
|
||||||
|
note over NodeA: SendHeartbeat tick — Indexer X dans le pool
|
||||||
|
|
||||||
|
NodeA -> IX: stream.Encode(Heartbeat{...})
|
||||||
|
IX -->x NodeA: timeout / transport error
|
||||||
|
|
||||||
|
NodeA -> NodeA: HeartbeatFailure(h, proto, dir, addr_X, info_X, isIndexerHB=true, maxPool)
|
||||||
|
|
||||||
|
NodeA -> NodeA: evictPeer(dir, addr_X, id_X, proto)\n→ Streams.Delete(proto, &id_X)\n→ DeleteAddr(addr_X)\n→ DeleteScore(addr_X)\n→ voters = remaining AddrInfos
|
||||||
|
|
||||||
|
NodeA -> NodeA: poolSize = len(dir.GetAddrs())
|
||||||
|
|
||||||
|
alt poolSize == 0
|
||||||
|
NodeA -> NodeA: reconnectToSeeds()\n→ réinjecte IndexerAddresses (IsSeed=true)
|
||||||
|
alt seeds ajoutés
|
||||||
|
NodeA -> NodeA: need = maxPool\nNudgeIt() → tick immédiat
|
||||||
|
else aucun seed configuré ou seeds injoignables
|
||||||
|
NodeA -> NodeA: go retryUntilSeedResponds()\n(backoff 10s→5min, panic si IndexerAddresses vide)
|
||||||
|
end
|
||||||
|
else poolSize > 0 AND len(voters) > 0
|
||||||
|
NodeA -> NodeA: go TriggerConsensus(h, voters, need)
|
||||||
|
NodeA -> IY: stream GET → GetValue{Key: candidate_DID}
|
||||||
|
IY --> NodeA: GetResponse{Found, Records}
|
||||||
|
NodeA -> IZ: stream GET → GetValue{Key: candidate_DID}
|
||||||
|
IZ --> NodeA: GetResponse{Found, Records}
|
||||||
|
note over NodeA: Quorum check:\nfound=true AND lastSeen ≤ 2×interval\nAND lastScore ≥ 30\n→ majorité → admission INEW
|
||||||
|
NodeA -> NodeA: Indexers.SetAddr(addr_NEW, &INEW_AddrInfo)\nIndexers.SetScore(addr_NEW, Score{IsSeed:false})\nNudgeIt()
|
||||||
|
else poolSize > 0 AND len(voters) == 0
|
||||||
|
NodeA -> DHT: go replenishIndexersFromDHT(h, need)\nDiscoverIndexersFromDHT → SelectByFillRate\n→ add to Indexers Directory
|
||||||
|
end
|
||||||
|
|
||||||
|
@enduml
|
||||||
46
docs/diagrams/hb_last_indexer.puml
Normal file
46
docs/diagrams/hb_last_indexer.puml
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
@startuml hb_last_indexer
|
||||||
|
title Protection last-indexer → reconnectToSeeds → retryUntilSeedResponds
|
||||||
|
|
||||||
|
participant "Node A" as NodeA
|
||||||
|
participant "Indexer LAST\n(seul restant)" as IL
|
||||||
|
participant "Seed Indexer\n(config)" as SEED
|
||||||
|
participant "DHT" as DHT
|
||||||
|
|
||||||
|
note over NodeA: Pool = 1 indexeur (LAST)\nIsSeed=false, score bas depuis longtemps
|
||||||
|
|
||||||
|
== Tentative d'éviction par score ==
|
||||||
|
NodeA -> NodeA: score < minScore\nAND TotalOnline ≥ 2×interval\nAND !IsSeed\nAND len(pool) > 1 ← FAUX : pool == 1
|
||||||
|
|
||||||
|
note over NodeA: Garde active : len(pool) == 1\n→ éviction par score BLOQUÉE\nLAST reste dans le pool
|
||||||
|
|
||||||
|
== Panne réseau (heartbeat fail) ==
|
||||||
|
NodeA -> IL: stream.Encode(Heartbeat{...})
|
||||||
|
IL -->x NodeA: timeout
|
||||||
|
|
||||||
|
NodeA -> NodeA: HeartbeatFailure → evictPeer(LAST)\npoolSize = 0
|
||||||
|
|
||||||
|
NodeA -> NodeA: reconnectToSeeds()\n→ parse IndexerAddresses (conf)\n→ SetAddr + SetScore(IsSeed=true) pour chaque seed
|
||||||
|
|
||||||
|
alt seeds ajoutés (IndexerAddresses non vide)
|
||||||
|
NodeA -> NodeA: NudgeIt() → tick immédiat
|
||||||
|
NodeA -> SEED: Heartbeat{...} (via SendHeartbeat nudge)
|
||||||
|
SEED --> NodeA: HeartbeatResponse{fillRate, ...}
|
||||||
|
note over NodeA: Pool rétabli via seeds.\nDHT proactive discovery reprend.
|
||||||
|
|
||||||
|
else IndexerAddresses vide
|
||||||
|
NodeA -> NodeA: go retryUntilSeedResponds()
|
||||||
|
note over NodeA: panic immédiat :\n"pool is empty and no seed indexers configured"\n→ arrêt du processus
|
||||||
|
end
|
||||||
|
|
||||||
|
== retryUntilSeedResponds (si seeds non répondants) ==
|
||||||
|
loop backoff exponentiel (10s → 20s → ... → 5min)
|
||||||
|
NodeA -> NodeA: time.Sleep(backoff)
|
||||||
|
NodeA -> NodeA: len(Indexers.GetAddrs()) > 0?\n→ oui : retour (quelqu'un a refillé)
|
||||||
|
NodeA -> NodeA: reconnectToSeeds()
|
||||||
|
alt pool > 0 après reconnect
|
||||||
|
NodeA -> NodeA: NudgeIt()\nDHT.Bootstrap(ctx, 15s)
|
||||||
|
note over NodeA: Sortie de la boucle.\nHeartbeat normal reprend.
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
@enduml
|
||||||
6
go.mod
6
go.mod
@@ -3,10 +3,12 @@ module oc-discovery
|
|||||||
go 1.25.0
|
go 1.25.0
|
||||||
|
|
||||||
require (
|
require (
|
||||||
cloud.o-forge.io/core/oc-lib v0.0.0-20260304145747-e03a0d3dd0aa
|
cloud.o-forge.io/core/oc-lib v0.0.0-20260311072518-933b7147e908
|
||||||
|
github.com/ipfs/go-cid v0.6.0
|
||||||
github.com/libp2p/go-libp2p v0.47.0
|
github.com/libp2p/go-libp2p v0.47.0
|
||||||
github.com/libp2p/go-libp2p-record v0.3.1
|
github.com/libp2p/go-libp2p-record v0.3.1
|
||||||
github.com/multiformats/go-multiaddr v0.16.1
|
github.com/multiformats/go-multiaddr v0.16.1
|
||||||
|
github.com/multiformats/go-multihash v0.2.3
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
@@ -32,7 +34,6 @@ require (
|
|||||||
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
|
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
|
||||||
github.com/huin/goupnp v1.3.0 // indirect
|
github.com/huin/goupnp v1.3.0 // indirect
|
||||||
github.com/ipfs/boxo v0.35.2 // indirect
|
github.com/ipfs/boxo v0.35.2 // indirect
|
||||||
github.com/ipfs/go-cid v0.6.0 // indirect
|
|
||||||
github.com/ipfs/go-datastore v0.9.0 // indirect
|
github.com/ipfs/go-datastore v0.9.0 // indirect
|
||||||
github.com/ipfs/go-log/v2 v2.9.1 // indirect
|
github.com/ipfs/go-log/v2 v2.9.1 // indirect
|
||||||
github.com/ipld/go-ipld-prime v0.21.0 // indirect
|
github.com/ipld/go-ipld-prime v0.21.0 // indirect
|
||||||
@@ -67,7 +68,6 @@ require (
|
|||||||
github.com/multiformats/go-multiaddr-fmt v0.1.0 // indirect
|
github.com/multiformats/go-multiaddr-fmt v0.1.0 // indirect
|
||||||
github.com/multiformats/go-multibase v0.2.0 // indirect
|
github.com/multiformats/go-multibase v0.2.0 // indirect
|
||||||
github.com/multiformats/go-multicodec v0.10.0 // indirect
|
github.com/multiformats/go-multicodec v0.10.0 // indirect
|
||||||
github.com/multiformats/go-multihash v0.2.3 // indirect
|
|
||||||
github.com/multiformats/go-multistream v0.6.1 // indirect
|
github.com/multiformats/go-multistream v0.6.1 // indirect
|
||||||
github.com/multiformats/go-varint v0.1.0 // indirect
|
github.com/multiformats/go-varint v0.1.0 // indirect
|
||||||
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 // indirect
|
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 // indirect
|
||||||
|
|||||||
14
go.sum
14
go.sum
@@ -1,17 +1,7 @@
|
|||||||
cloud.o-forge.io/core/oc-lib v0.0.0-20260224130821-ce8ef70516f7 h1:p9uJjMY+QkE4neA+xRmIRtAm9us94EKZqgajDdLOd0Y=
|
|
||||||
cloud.o-forge.io/core/oc-lib v0.0.0-20260224130821-ce8ef70516f7/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
|
|
||||||
cloud.o-forge.io/core/oc-lib v0.0.0-20260226084851-959fce48ef6c h1:FTUu9tdEfib6J+fuc7e5wYTe++EIlB70bVNpOeFjnyU=
|
|
||||||
cloud.o-forge.io/core/oc-lib v0.0.0-20260226084851-959fce48ef6c/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
|
|
||||||
cloud.o-forge.io/core/oc-lib v0.0.0-20260226085754-f4e2d8057df0 h1:lvrRF4ToIMl/5k1q4AiPEy6ycjwRtOaDhWnQ/LrW1ZA=
|
|
||||||
cloud.o-forge.io/core/oc-lib v0.0.0-20260226085754-f4e2d8057df0/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
|
|
||||||
cloud.o-forge.io/core/oc-lib v0.0.0-20260226091217-cb3771c17a31 h1:hvkvJibS9NmImw73j79Ov5VpIYs4WbP4SYGlK/XO82Q=
|
|
||||||
cloud.o-forge.io/core/oc-lib v0.0.0-20260226091217-cb3771c17a31/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
|
|
||||||
cloud.o-forge.io/core/oc-lib v0.0.0-20260302152414-542b0b73aba5 h1:h+Fkyj6cfwAirc0QGCBEkZSSrgcyThXswg7ytOLm948=
|
|
||||||
cloud.o-forge.io/core/oc-lib v0.0.0-20260302152414-542b0b73aba5/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
|
|
||||||
cloud.o-forge.io/core/oc-lib v0.0.0-20260304143917-340f2a6301b7 h1:RZGV3ttkfoKIigUb7T+M5Kq+YtqW/td45EmNYeW5u8k=
|
|
||||||
cloud.o-forge.io/core/oc-lib v0.0.0-20260304143917-340f2a6301b7/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
|
|
||||||
cloud.o-forge.io/core/oc-lib v0.0.0-20260304145747-e03a0d3dd0aa h1:1wCpI4dwN1pj6MlpJ7/WifhHVHmCE4RU+9klwqgo/bk=
|
cloud.o-forge.io/core/oc-lib v0.0.0-20260304145747-e03a0d3dd0aa h1:1wCpI4dwN1pj6MlpJ7/WifhHVHmCE4RU+9klwqgo/bk=
|
||||||
cloud.o-forge.io/core/oc-lib v0.0.0-20260304145747-e03a0d3dd0aa/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
|
cloud.o-forge.io/core/oc-lib v0.0.0-20260304145747-e03a0d3dd0aa/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
|
||||||
|
cloud.o-forge.io/core/oc-lib v0.0.0-20260311072518-933b7147e908 h1:1jz3xI/u2FzCG8phY7ShqADrmCj0mlrdjbdNUosSwgs=
|
||||||
|
cloud.o-forge.io/core/oc-lib v0.0.0-20260311072518-933b7147e908/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
|
||||||
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||||
github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0=
|
github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0=
|
||||||
github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
|
github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
|
||||||
|
|||||||
6
main.go
6
main.go
@@ -27,7 +27,7 @@ func main() {
|
|||||||
conf.GetConfig().PSKPath = o.GetStringDefault("PSK_PATH", "./psk/psk.key")
|
conf.GetConfig().PSKPath = o.GetStringDefault("PSK_PATH", "./psk/psk.key")
|
||||||
conf.GetConfig().NodeEndpointPort = o.GetInt64Default("NODE_ENDPOINT_PORT", 4001)
|
conf.GetConfig().NodeEndpointPort = o.GetInt64Default("NODE_ENDPOINT_PORT", 4001)
|
||||||
conf.GetConfig().IndexerAddresses = o.GetStringDefault("INDEXER_ADDRESSES", "")
|
conf.GetConfig().IndexerAddresses = o.GetStringDefault("INDEXER_ADDRESSES", "")
|
||||||
conf.GetConfig().NativeIndexerAddresses = o.GetStringDefault("NATIVE_INDEXER_ADDRESSES", "")
|
|
||||||
|
|
||||||
conf.GetConfig().PeerIDS = o.GetStringDefault("PEER_IDS", "")
|
conf.GetConfig().PeerIDS = o.GetStringDefault("PEER_IDS", "")
|
||||||
|
|
||||||
@@ -44,9 +44,7 @@ func main() {
|
|||||||
defer stop()
|
defer stop()
|
||||||
isNode := strings.Contains(conf.GetConfig().NodeMode, "node")
|
isNode := strings.Contains(conf.GetConfig().NodeMode, "node")
|
||||||
isIndexer := strings.Contains(conf.GetConfig().NodeMode, "indexer")
|
isIndexer := strings.Contains(conf.GetConfig().NodeMode, "indexer")
|
||||||
isNativeIndexer := strings.Contains(conf.GetConfig().NodeMode, "native-indexer")
|
if n, err := node.InitNode(isNode, isIndexer); err != nil {
|
||||||
|
|
||||||
if n, err := node.InitNode(isNode, isIndexer, isNativeIndexer); err != nil {
|
|
||||||
panic(err)
|
panic(err)
|
||||||
} else {
|
} else {
|
||||||
<-ctx.Done() // the only blocking point
|
<-ctx.Done() // the only blocking point
|
||||||
|
|||||||
@@ -1,3 +0,0 @@
|
|||||||
-----BEGIN PRIVATE KEY-----
|
|
||||||
MC4CAQAwBQYDK2VwBCIEIPc7D3Mgb1U2Ipyb/85hA4Ew7dC8zHDEuQYSjqzzRgLK
|
|
||||||
-----END PRIVATE KEY-----
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
-----BEGIN PRIVATE KEY-----
|
|
||||||
MC4CAQAwBQYDK2VwBCIEIK2oBaOtGNchE09MBRtPd5oEOUcVUQG2ndym5wKExj7R
|
|
||||||
-----END PRIVATE KEY-----
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
-----BEGIN PRIVATE KEY-----
|
|
||||||
MC4CAQAwBQYDK2VwBCIEIE58GDazCyF1jp796ivSmHiCepbkC8TpzliIaQ7eGEpu
|
|
||||||
-----END PRIVATE KEY-----
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
-----BEGIN PRIVATE KEY-----
|
|
||||||
MC4CAQAwBQYDK2VwBCIEIAeX4O7ldwehRSnPkbzuE6csyo63vjvqAcNNujENOKUC
|
|
||||||
-----END PRIVATE KEY-----
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
-----BEGIN PRIVATE KEY-----
|
|
||||||
MC4CAQAwBQYDK2VwBCIEIEkgqINXDLnxIJZs2LEK9O4vdsqk43dwbULGUE25AWuR
|
|
||||||
-----END PRIVATE KEY-----
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
-----BEGIN PRIVATE KEY-----
|
|
||||||
MC4CAQAwBQYDK2VwBCIEIBcflxGlZYyUVJoExC94rHZbIyKMwZ+Oh7EDkb0qUlxd
|
|
||||||
-----END PRIVATE KEY-----
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
-----BEGIN PUBLIC KEY-----
|
|
||||||
MCowBQYDK2VwAyEAEomuEQGmGsYVw35C6DB5tfY8LI8jm359ceAxRX8eQ0o=
|
|
||||||
-----END PUBLIC KEY-----
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
-----BEGIN PUBLIC KEY-----
|
|
||||||
MCowBQYDK2VwAyEAZ2nLJBL8a5opfa8nFeVj0SZToW8pl4+zgcSUkeZFRO4=
|
|
||||||
-----END PUBLIC KEY-----
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
-----BEGIN PUBLIC KEY-----
|
|
||||||
MCowBQYDK2VwAyEAIQVeSGwsjPjyepPTnzzYqVxIxviSEjZXU7C7zuNTui4=
|
|
||||||
-----END PUBLIC KEY-----
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
-----BEGIN PUBLIC KEY-----
|
|
||||||
MCowBQYDK2VwAyEAG95Ettl3jTi41HM8le1A9WDmOEq0ANEqpLF7zTZrfXA=
|
|
||||||
-----END PUBLIC KEY-----
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
-----BEGIN PUBLIC KEY-----
|
|
||||||
MCowBQYDK2VwAyEA/ymOIb0sJ0qCWrf3mKz7ACCvsMXLog/EK533JfNXZTM=
|
|
||||||
-----END PUBLIC KEY-----
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
-----BEGIN PUBLIC KEY-----
|
|
||||||
MCowBQYDK2VwAyEAZ4F3KqOp/5QrPdZGqqX6PYYEGd2snX4Q3AUt9XAG3v8=
|
|
||||||
-----END PUBLIC KEY-----
|
|
||||||
Reference in New Issue
Block a user