This commit is contained in:
mr
2026-03-11 19:29:39 +01:00
parent d0af40f4c7
commit 780a0c530d
13 changed files with 51 additions and 997 deletions

View File

@@ -368,7 +368,6 @@ func SendHeartbeat(ctx context.Context, proto protocol.ID, name string, h host.H
resp, rtt, err := sendHeartbeat(ctx, h, proto, ai.Info, hb, directory.Streams, interval*time.Second)
if err != nil { // Heartbeat fails
fmt.Println("EERR", err)
HeartbeatFailure(h, proto, directory, ai.Addr, ai.Info, isIndexerHB, maxPool, err)
continue
}
@@ -377,6 +376,7 @@ func SendHeartbeat(ctx context.Context, proto protocol.ID, name string, h host.H
// even if the indexer does not support bidirectional heartbeat (Fix 1).
if isIndexerHB && score != nil {
score.UptimeTracker.RecordHeartbeat()
score.UptimeTracker.ConsecutiveFails = 0 // reset on success
maxRTT := BaseRoundTrip * 10
latencyScore := 1.0 - float64(rtt)/float64(maxRTT)
@@ -442,11 +442,6 @@ func SendHeartbeat(ctx context.Context, proto protocol.ID, name string, h host.H
}
}
// Refresh local witness cache for indirect probing on future failure.
for _, w := range resp.Witnesses {
score.UpdateWitnessPool(w)
}
// Launch witness cross-check asynchronously (must not hold lock).
if len(resp.Witnesses) > 0 {
go queryWitnesses(h, ai.Info.ID.String(), resp.BornAt, resp.FillRate, resp.Witnesses, score)
@@ -550,16 +545,22 @@ func HeartbeatFailure(h host.Host, proto protocol.ID, directory *Directory,
Msg("[pool] seed heartbeat failed — keeping in pool, ticker will retry " + err.Error())
return
}
// Indirect probe: query cached witnesses before declaring the indexer dead.
// If a witness confirms it is alive, the failure is a local asymmetric
// link — not the indexer. Skip eviction; next tick will retry directly.
if len(score.WitnessPool) > 0 {
pool := append([]WitnessCacheEntry(nil), score.WitnessPool...)
if IndirectProbeIndexer(h, info.ID.String(), pool) {
// Indirect probing via other alive indexers:
// If other indexers in the pool are still responding, they act as implicit
// third-party witnesses confirming our connectivity is fine — the failed
// indexer is genuinely dead, evict immediately.
// If this is the last indexer, there is no third party. Retry up to 3 times
// (consecutive failures tracked in UptimeTracker) before declaring it dead.
if len(directory.GetAddrs()) <= 1 {
score.UptimeTracker.ConsecutiveFails++
if score.UptimeTracker.ConsecutiveFails < 3 {
logger.Warn().Str("peer", info.ID.String()).
Msg("[indirect] witness confirms indexer alive — asymmetric link, skipping eviction " + err.Error())
Int("attempt", score.UptimeTracker.ConsecutiveFails).
Msg("[indirect] last indexer failed, retrying before eviction")
return
}
logger.Warn().Str("peer", info.ID.String()).
Msg("[indirect] last indexer failed 3 times consecutively, evicting")
}
}
}