Change
This commit is contained in:
@@ -368,7 +368,6 @@ func SendHeartbeat(ctx context.Context, proto protocol.ID, name string, h host.H
|
||||
|
||||
resp, rtt, err := sendHeartbeat(ctx, h, proto, ai.Info, hb, directory.Streams, interval*time.Second)
|
||||
if err != nil { // Heartbeat fails
|
||||
fmt.Println("EERR", err)
|
||||
HeartbeatFailure(h, proto, directory, ai.Addr, ai.Info, isIndexerHB, maxPool, err)
|
||||
continue
|
||||
}
|
||||
@@ -377,6 +376,7 @@ func SendHeartbeat(ctx context.Context, proto protocol.ID, name string, h host.H
|
||||
// even if the indexer does not support bidirectional heartbeat (Fix 1).
|
||||
if isIndexerHB && score != nil {
|
||||
score.UptimeTracker.RecordHeartbeat()
|
||||
score.UptimeTracker.ConsecutiveFails = 0 // reset on success
|
||||
|
||||
maxRTT := BaseRoundTrip * 10
|
||||
latencyScore := 1.0 - float64(rtt)/float64(maxRTT)
|
||||
@@ -442,11 +442,6 @@ func SendHeartbeat(ctx context.Context, proto protocol.ID, name string, h host.H
|
||||
}
|
||||
}
|
||||
|
||||
// Refresh local witness cache for indirect probing on future failure.
|
||||
for _, w := range resp.Witnesses {
|
||||
score.UpdateWitnessPool(w)
|
||||
}
|
||||
|
||||
// Launch witness cross-check asynchronously (must not hold lock).
|
||||
if len(resp.Witnesses) > 0 {
|
||||
go queryWitnesses(h, ai.Info.ID.String(), resp.BornAt, resp.FillRate, resp.Witnesses, score)
|
||||
@@ -550,16 +545,22 @@ func HeartbeatFailure(h host.Host, proto protocol.ID, directory *Directory,
|
||||
Msg("[pool] seed heartbeat failed — keeping in pool, ticker will retry " + err.Error())
|
||||
return
|
||||
}
|
||||
// Indirect probe: query cached witnesses before declaring the indexer dead.
|
||||
// If a witness confirms it is alive, the failure is a local asymmetric
|
||||
// link — not the indexer. Skip eviction; next tick will retry directly.
|
||||
if len(score.WitnessPool) > 0 {
|
||||
pool := append([]WitnessCacheEntry(nil), score.WitnessPool...)
|
||||
if IndirectProbeIndexer(h, info.ID.String(), pool) {
|
||||
// Indirect probing via other alive indexers:
|
||||
// If other indexers in the pool are still responding, they act as implicit
|
||||
// third-party witnesses confirming our connectivity is fine — the failed
|
||||
// indexer is genuinely dead, evict immediately.
|
||||
// If this is the last indexer, there is no third party. Retry up to 3 times
|
||||
// (consecutive failures tracked in UptimeTracker) before declaring it dead.
|
||||
if len(directory.GetAddrs()) <= 1 {
|
||||
score.UptimeTracker.ConsecutiveFails++
|
||||
if score.UptimeTracker.ConsecutiveFails < 3 {
|
||||
logger.Warn().Str("peer", info.ID.String()).
|
||||
Msg("[indirect] witness confirms indexer alive — asymmetric link, skipping eviction " + err.Error())
|
||||
Int("attempt", score.UptimeTracker.ConsecutiveFails).
|
||||
Msg("[indirect] last indexer failed, retrying before eviction")
|
||||
return
|
||||
}
|
||||
logger.Warn().Str("peer", info.ID.String()).
|
||||
Msg("[indirect] last indexer failed 3 times consecutively, evicting")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user