Simple Architecture
This commit is contained in:
@@ -4,55 +4,61 @@ title Node Initialization — Peer A (InitNode)
|
||||
participant "main (Peer A)" as MainA
|
||||
participant "Node A" as NodeA
|
||||
participant "libp2p (Peer A)" as libp2pA
|
||||
participant "ConnectionGater A" as GaterA
|
||||
participant "DB Peer A (oc-lib)" as DBA
|
||||
participant "NATS A" as NATSA
|
||||
participant "Indexer (partagé)" as IndexerA
|
||||
participant "Indexer (shared)" as IndexerA
|
||||
participant "DHT A" as DHTA
|
||||
participant "StreamService A" as StreamA
|
||||
participant "PubSubService A" as PubSubA
|
||||
|
||||
MainA -> NodeA: InitNode(isNode, isIndexer, isNativeIndexer)
|
||||
MainA -> NodeA: InitNode(isNode=true, isIndexer=false)
|
||||
|
||||
NodeA -> NodeA: LoadKeyFromFilePrivate() → priv
|
||||
NodeA -> NodeA: LoadPSKFromFile() → psk
|
||||
|
||||
NodeA -> libp2pA: New(PrivateNetwork(psk), Identity(priv), ListenAddr:4001)
|
||||
NodeA -> GaterA: newOCConnectionGater(nil)
|
||||
NodeA -> libp2pA: New(\n PrivateNetwork(psk),\n Identity(priv),\n ListenAddr: tcp/4001,\n ConnectionGater(gater)\n)
|
||||
libp2pA --> NodeA: host A (PeerID_A)
|
||||
NodeA -> GaterA: gater.host = host A
|
||||
|
||||
note over NodeA: isNode == true
|
||||
note over GaterA: InterceptSecured (inbound):\n1. DB lookup by peer_id\n → BLACKLIST : refuse\n → found : accept\n2. Not found → DHT sequential check\n (transport-error fallthrough only)
|
||||
|
||||
NodeA -> libp2pA: NewGossipSub(ctx, host)
|
||||
libp2pA --> NodeA: ps (GossipSub)
|
||||
NodeA -> libp2pA: SetStreamHandler(/opencloud/probe/1.0, HandleBandwidthProbe)
|
||||
NodeA -> libp2pA: SetStreamHandler(/opencloud/witness/1.0, HandleWitnessQuery)
|
||||
|
||||
NodeA -> IndexerA: ConnectToIndexers → SendHeartbeat /opencloud/heartbeat/1.0
|
||||
note over IndexerA: Heartbeat long-lived established\nQuality Score evaluated (bw + uptime + diversity)
|
||||
IndexerA --> NodeA: OK
|
||||
NodeA -> libp2pA: NewGossipSub(ctx, host) → ps (GossipSub)
|
||||
|
||||
NodeA -> NodeA: buildRecord() closure\n→ signs fresh PeerRecord (expiry=now+2min)\n embedded in each heartbeat tick
|
||||
|
||||
NodeA -> IndexerA: ConnectToIndexers(host, minIndexer=1, maxIndexer=5, buildRecord)
|
||||
note over IndexerA: Reads IndexerAddresses from config\nAdds seeds → Indexers Directory (IsSeed=true)\nLaunches SendHeartbeat goroutine (20s ticker)
|
||||
|
||||
IndexerA -> DHTA: proactive DHT discovery (after 5s warmup)\ninitNodeDHT(h, seeds)\nDiscoverIndexersFromDHT → SelectByFillRate\n→ add to Indexers Directory + NudgeIt()
|
||||
|
||||
NodeA -> NodeA: claimInfo(name, hostname)
|
||||
NodeA -> IndexerA: TempStream /opencloud/record/publish/1.0
|
||||
NodeA -> IndexerA: stream.Encode(PeerRecord A signé)
|
||||
IndexerA -> IndexerA: DHT.PutValue("/node/"+DID_A, record)
|
||||
NodeA -> IndexerA: stream.Encode(Signed PeerRecord A)
|
||||
IndexerA -> DHTA: PutValue("/node/"+DID_A, record)
|
||||
|
||||
NodeA -> DBA: DB(PEER).Search(SELF)
|
||||
DBA --> NodeA: local peer A (or new generated UUID)
|
||||
|
||||
NodeA -> NodeA: StartGC(30s) — GarbageCollector on StreamRecords
|
||||
NodeA -> NodeA: StartGC(30s)
|
||||
|
||||
NodeA -> StreamA: InitStream(ctx, host, PeerID_A, 1000, nodeA)
|
||||
StreamA -> StreamA: SetStreamHandler(heartbeat/partner, search, planner, ...)
|
||||
StreamA -> DBA: Search(PEER, PARTNER) → partner list
|
||||
DBA --> StreamA: Heartbeat long-lived established to partners
|
||||
StreamA -> StreamA: SetStreamHandler(resource/search, create, update,\n delete, planner, verify, considers)
|
||||
StreamA --> NodeA: StreamService A
|
||||
|
||||
NodeA -> PubSubA: InitPubSub(ctx, host, ps, nodeA, streamA)
|
||||
PubSubA -> PubSubA: subscribeEvents(PB_SEARCH, timeout=-1)
|
||||
PubSubA --> NodeA: PubSubService A
|
||||
|
||||
NodeA -> NodeA: SubscribeToSearch(ps, callback) (search global topic for resources)
|
||||
note over NodeA: callback: GetPeerRecord(evt.From)\n→ StreamService.SendResponse
|
||||
NodeA -> NodeA: SubscribeToSearch(ps, callback)
|
||||
note over NodeA: callback: if evt.From != self\n → GetPeerRecord(evt.From)\n → StreamService.SendResponse
|
||||
|
||||
NodeA -> NATSA: ListenNATS(nodeA)
|
||||
note over NATSA: Subscribes handlers:\nCREATE_RESOURCE, PROPALGATION_EVENT
|
||||
note over NATSA: Subscribes:\nCREATE_RESOURCE → partner on-demand\nPROPALGATION_EVENT → resource propagation
|
||||
|
||||
NodeA --> MainA: *Node A is ready
|
||||
|
||||
note over NodeA,IndexerA: SendHeartbeat goroutine (permanent, 20s ticker):\nNode → Indexer : Heartbeat{name, PeerID, indexersBinded, need, challenges?, record}\nIndexer → Node : HeartbeatResponse{fillRate, challenges, suggestions, witnesses, suggestMigrate}\nScore updated (7 dimensions), pool managed autonomously
|
||||
|
||||
@enduml
|
||||
|
||||
@@ -1,49 +1,59 @@
|
||||
@startuml indexer_heartbeat
|
||||
title Indexer — Heartbeat node → indexer (score on 5 metrics)
|
||||
title Heartbeat bidirectionnel node → indexeur (scoring 7 dimensions + challenges)
|
||||
|
||||
participant "Node A" as NodeA
|
||||
participant "Node B" as NodeB
|
||||
participant "IndexerService" as Indexer
|
||||
|
||||
note over NodeA,NodeB: Every node tick every 20s (SendHeartbeat)
|
||||
note over NodeA,NodeB: SendHeartbeat goroutine — tick every 20s
|
||||
|
||||
par Node A heartbeat
|
||||
NodeA -> Indexer: NewStream /opencloud/heartbeat/1.0
|
||||
NodeA -> Indexer: stream.Encode(Heartbeat{Name, PeerID_A, IndexersBinded, Record})
|
||||
== Tick Node A ==
|
||||
|
||||
Indexer -> Indexer: CheckHeartbeat(host, stream, dec, streams, mu, maxNodes)
|
||||
note over Indexer: len(h.Network().Peers()) >= maxNodes → reject
|
||||
NodeA -> Indexer: NewStream /opencloud/heartbeat/1.0\n(long-lived, réutilisé aux ticks suivants)
|
||||
NodeA -> Indexer: stream.Encode(Heartbeat{\n name, PeerID_A, timestamp,\n indexersBinded: [addr1, addr2],\n need: maxPool - len(pool),\n challenges: [PeerID_A, PeerID_B], ← batch (tous les 1-10 HBs)\n challengeDID: "uuid-did-A", ← DHT challenge (tous les 5 batches)\n record: SignedPeerRecord_A ← expiry=now+2min\n})
|
||||
|
||||
Indexer -> Indexer: getBandwidthChallengeRate(host, remotePeer, 512-2048B)
|
||||
Indexer -> Indexer: CheckHeartbeat(stream, maxNodes)\n→ len(Peers()) >= maxNodes → reject
|
||||
|
||||
Indexer -> Indexer: getOwnDiversityRate(host)\\nh.Network().Peers() + Peerstore.Addrs()\\n→ ratio /24 subnets distincts
|
||||
Indexer -> Indexer: HandleHeartbeat → UptimeTracker.RecordHeartbeat()\n→ gap ≤ 2×interval : TotalOnline += gap
|
||||
|
||||
Indexer -> Indexer: fillRate = len(h.Network().Peers()) / maxNodes
|
||||
Indexer -> Indexer: Republish PeerRecord A to DHT\nDHT.PutValue("/node/"+DID_A, record_A)
|
||||
|
||||
Indexer -> Indexer: Retrieve existing UptimeTracker\\noldTracker.RecordHeartbeat()\\n→ TotalOnline += gap si gap ≤ 120s\\nuptimeRatio = TotalOnline / time.Since(FirstSeen)
|
||||
== Réponse indexeur → node A ==
|
||||
|
||||
Indexer -> Indexer: ComputeIndexerScore(\\n uptimeRatio, bpms, diversity,\\n latencyScore, fillRate\\n)\\nScore = (0.20×U + 0.20×B + 0.20×D + 0.15×L + 0.25×F) × 100
|
||||
Indexer -> Indexer: BuildHeartbeatResponse(remotePeer=A, need, challenges, challengeDID)\n\nfillRate = connected_nodes / MaxNodesConn()\npeerCount = connected_nodes\nmaxNodes = MaxNodesConn()\nbornAt = time of indexer startup\n\nChallenges: pour chaque PeerID challengé\n found = PeerID dans StreamRecords[ProtocolHeartbeat]?\n lastSeen = HeartbeatStream.UptimeTracker.LastSeen\n\nDHT challenge:\n DHT.GetValue("/node/"+challengeDID, timeout=3s)\n → dhtFound + dhtPayload\n\nWitnesses: jusqu'à 3 AddrInfos de nœuds connectés\n (adresses connues dans Peerstore)\n\nSuggestions: jusqu'à `need` indexeurs depuis dhtCache\n (refresh asynchrone 2min, SelectByFillRate)\n\nSuggestMigrate: fillRate > 80%\n ET node dans offload.inBatch (batch ≤ 5, grace 3×HB)
|
||||
|
||||
Indexer -> Indexer: dynamicMinScore(age)\\n= 20 + 60×(hours/24), max 80
|
||||
Indexer --> NodeA: stream.Encode(HeartbeatResponse{\n fillRate, peerCount, maxNodes, bornAt,\n challenges, dhtFound, dhtPayload,\n witnesses, suggestions, suggestMigrate\n})
|
||||
|
||||
alt Score A < dynamicMinScore(age)
|
||||
Indexer -> NodeA: (close stream — "not enough trusting value")
|
||||
else Score A >= dynamicMinScore(age)
|
||||
Indexer -> Indexer: streams[PeerID_A].HeartbeatStream = hb.Stream\\nstreams[PeerID_A].HeartbeatStream.UptimeTracker = oldTracker\\nstreams[PeerID_A].LastScore = hb.Score
|
||||
note over Indexer: AfterHeartbeat → republish PeerRecord on DHT
|
||||
== Traitement score côté Node A ==
|
||||
|
||||
NodeA -> NodeA: score = ensureScore(Indexers, addr_indexer)\nscore.UptimeTracker.RecordHeartbeat()\n\nlatencyScore = max(0, 1 - RTT / (BaseRoundTrip × 10))\n\nBornAt stability:\n bornAt changed? → score.bornAtChanges++\n\nfillConsistency:\n expected = peerCount / maxNodes\n |expected - fillRate| < 10% → fillConsistent++\n\nChallenge PeerID (ground truth own PeerID):\n found=true AND lastSeen < 2×interval → challengeCorrect++\n\nDHT challenge:\n dhtFound=true → dhtSuccess++\n\nWitness query (async):\n go queryWitnesses(h, indexerID, bornAt, fillRate, witnesses, score)
|
||||
|
||||
NodeA -> NodeA: score.Score = ComputeNodeSideScore(latencyScore)\n\nScore = (\n 0.20 × uptimeRatio\n+ 0.20 × challengeAccuracy\n+ 0.15 × latencyScore\n+ 0.10 × fillScore ← 1 - fillRate\n+ 0.10 × fillConsistency\n+ 0.15 × witnessConsistency\n+ 0.10 × dhtSuccessRate\n) × 100 × bornAtPenalty\n\nbornAtPenalty = max(0, 1 - 0.30 × bornAtChanges)\nminScore = clamp(20 + 60 × (age.Hours/24), 20, 80)
|
||||
|
||||
alt score < minScore\n AND TotalOnline ≥ 2×interval\n AND !IsSeed\n AND len(pool) > 1
|
||||
NodeA -> NodeA: evictPeer(dir, addr, id, proto)\n→ delete Addr + Score + Stream\ngo TriggerConsensus(h, voters, need)\n ou replenishIndexersFromDHT(h, need)
|
||||
end
|
||||
|
||||
alt resp.SuggestMigrate == true AND nonSeedCount >= MinIndexer
|
||||
alt IsSeed
|
||||
NodeA -> NodeA: score.IsSeed = false\n(de-stickied — score eviction maintenant possible)
|
||||
else !IsSeed
|
||||
NodeA -> NodeA: evictPeer → migration acceptée
|
||||
end
|
||||
end
|
||||
|
||||
else Node B heartbeat
|
||||
NodeB -> Indexer: NewStream /opencloud/heartbeat/1.0
|
||||
NodeB -> Indexer: stream.Encode(Heartbeat{Name, PeerID_B, IndexersBinded, Record})
|
||||
alt len(resp.Suggestions) > 0
|
||||
NodeA -> NodeA: handleSuggestions(dir, indexerID, suggestions)\n→ inconnus ajoutés à Indexers Directory\n→ NudgeIt() si ajout effectif
|
||||
end
|
||||
|
||||
Indexer -> Indexer: CheckHeartbeat → getBandwidthChallengeRate\\n→ getOwnDiversityRate → ComputeIndexerScore(5 composants)
|
||||
== Tick Node B (concurrent) ==
|
||||
|
||||
alt Score B >= dynamicMinScore(age)
|
||||
Indexer -> Indexer: streams[PeerID_B] subscribed + LastScore updated
|
||||
end
|
||||
end par
|
||||
NodeB -> Indexer: stream.Encode(Heartbeat{PeerID_B, ...})
|
||||
Indexer -> Indexer: CheckHeartbeat → UptimeTracker → BuildHeartbeatResponse
|
||||
Indexer --> NodeB: HeartbeatResponse{...}
|
||||
|
||||
note over Indexer: GC ticker 30s — gc()\\nnow.After(Expiry) où Expiry = lastHBTime + 2min\\n→ AfterDelete(pid, name, did)
|
||||
== GC côté Indexeur ==
|
||||
|
||||
note over Indexer: GC ticker 30s — gc()\nnow.After(Expiry) où Expiry = lastHBTime + 2min\n→ AfterDelete(pid, name, did) hors lock\n→ publishNameEvent(NameIndexDelete, ...)\nFillRate recalculé automatiquement
|
||||
|
||||
@enduml
|
||||
|
||||
@@ -1,42 +1,38 @@
|
||||
@startuml
|
||||
title NATS — CREATE_RESOURCE : Peer A Create/Update Peer B & establishing stream
|
||||
title NATS — CREATE_RESOURCE : Peer A crée/met à jour Peer B (connexion on-demand)
|
||||
|
||||
participant "App Peer A (oc-api)" as AppA
|
||||
participant "NATS A" as NATSA
|
||||
participant "Node A" as NodeA
|
||||
participant "StreamService A" as StreamA
|
||||
participant "Node B" as NodeB
|
||||
participant "StreamService B" as StreamB
|
||||
participant "DB Peer A (oc-lib)" as DBA
|
||||
|
||||
note over AppA: Peer B is discovered\n(per indexer or manually)
|
||||
note over AppA: Peer B est découvert\n(via indexeur ou manuellement)
|
||||
|
||||
AppA -> NATSA: Publish(CREATE_RESOURCE, {\n FromApp:"oc-api",\n Datatype:PEER,\n Payload: Peer B {StreamAddress_B, Relation:PARTNER}\n})
|
||||
|
||||
NATSA -> NodeA: ListenNATS callback → CREATE_RESOURCE
|
||||
|
||||
NodeA -> NodeA: if from himself ? → No, continue
|
||||
NodeA -> NodeA: json.Unmarshal(payload) → peer.Peer B
|
||||
NodeA -> NodeA: if peer == self ? → skip
|
||||
|
||||
alt peer B.Relation == PARTNER
|
||||
NodeA -> StreamA: ConnectToPartner(B.StreamAddress)
|
||||
StreamA -> NodeB: Connect (libp2p)
|
||||
StreamA -> NodeB: NewStream /opencloud/resource/heartbeat/partner/1.0
|
||||
StreamA -> NodeB: json.Encode(Heartbeat{Name_A, DID_A, PeerID_A})
|
||||
NodeA -> StreamA: ToPartnerPublishEvent(ctx, PB_CREATE, PEER, payload)
|
||||
note over StreamA: Pas de heartbeat permanent.\nConnexion on-demand : ouvre un stream,\nenvoie l'événement, ferme ou laisse expirer.
|
||||
StreamA -> StreamA: PublishCommon(PEER, user, B.PeerID,\n ProtocolUpdateResource, selfPeerJSON)
|
||||
StreamA -> NodeB: TempStream /opencloud/resource/update/1.0\n(TTL court, fermé après envoi)
|
||||
StreamA -> NodeB: stream.Encode(Event{from, datatype, payload})
|
||||
NodeB --> StreamA: (traitement applicatif)
|
||||
|
||||
NodeB -> StreamB: HandlePartnerHeartbeat(stream)
|
||||
StreamB -> StreamB: CheckHeartbeat → bandwidth challenge
|
||||
StreamB -> StreamB: streams[ProtocolHeartbeatPartner][PeerID_A] = {DID_A, Expiry=now+10s}
|
||||
|
||||
StreamA -> StreamA: streams[ProtocolHeartbeatPartner][PeerID_B] = {DID_B, Expiry=now+10s}
|
||||
note over StreamA,StreamB: Stream partner long-lived établi\nbi-directionnal
|
||||
else peer B.Relation != PARTNER (revoke / blacklist)
|
||||
note over NodeA: Suppress all streams onto Peer B
|
||||
loop For every Streams
|
||||
else peer B.Relation != PARTNER (révocation / blacklist)
|
||||
note over NodeA: Ferme tous les streams existants vers Peer B
|
||||
loop Pour chaque stream actif vers PeerID_B
|
||||
NodeA -> StreamA: streams[proto][PeerID_B].Stream.Close()
|
||||
NodeA -> StreamA: delete(streams[proto], PeerID_B)
|
||||
end
|
||||
end
|
||||
NodeA -> DBA: (no write — only app source manually add peer)
|
||||
|
||||
NodeA -> DBA: (pas d'écriture directe — seule l'app source gère la DB)
|
||||
|
||||
@enduml
|
||||
|
||||
@@ -1,42 +1,35 @@
|
||||
@startuml 25_failure_node_gc
|
||||
title F7 — Crash nœud → GC indexeur + AfterDelete
|
||||
|
||||
participant "Node\\n(crashé)" as N
|
||||
participant "Node\n(crashé)" as N
|
||||
participant "Indexer A" as IA
|
||||
participant "Indexer B" as IB
|
||||
participant "Native A" as NA
|
||||
|
||||
note over N, NA: État nominal : N heartbeatait vers IA et IB
|
||||
note over N, IB: État nominal : N heartbeatait vers IA et IB
|
||||
|
||||
== Crash Node ==
|
||||
N ->x IA: stream reset (heartbeat coupé)
|
||||
N ->x IB: stream reset (heartbeat coupé)
|
||||
|
||||
== GC côté Indexer A ==
|
||||
note over IA: HandleHeartbeat : stream reset détecté\\nStreamRecords[ProtocolHB][N].LastSeen figé
|
||||
note over IA: HandleHeartbeat : stream reset détecté\nStreamRecords[ProtocolHeartbeat][N].Expiry figé
|
||||
|
||||
loop ticker GC (30s) — StartGC(30*time.Second)
|
||||
IA -> IA: gc()\\nnow.After(Expiry) où Expiry = lastHBTime + 2min\\n→ si 2min sans heartbeat → éviction
|
||||
IA -> IA: delete(StreamRecords[ProtocolHB][N])\\nAfterDelete(N, name, did) appelé hors lock
|
||||
note over IA: N retiré du registre vivant.\\nFillRate recalculé (n-1 / maxNodes).
|
||||
IA -> IA: gc()\nnow.After(Expiry) où Expiry = lastHBTime + 2min\n→ si 2min sans heartbeat → éviction
|
||||
IA -> IA: delete(StreamRecords[ProtocolHeartbeat][N])\nAfterDelete(N, name, did) appelé hors lock
|
||||
note over IA: N retiré du registre vivant.\nFillRate recalculé : (n-1) / MaxNodesConn()
|
||||
end
|
||||
|
||||
== Impact sur le scoring / fill rate ==
|
||||
note over IA: FillRate diminue\\nProchain subscribe vers NA inclura FillRate mis à jour
|
||||
== Impact fill rate ==
|
||||
note over IA: FillRate diminue.\nProchain BuildHeartbeatResponse\ninclura FillRate mis à jour.\nSi fillRate revient < 80% :\n→ offload.inBatch et alreadyTried réinitialisés.
|
||||
|
||||
IA -> NA: /opencloud/native/subscribe/1.0\\nIndexerRegistration{FillRate: 0.3} /' était 0.5 '/
|
||||
|
||||
NA -> NA: liveIndexerEntry[IA].FillRate = 0.3\\nPriorité de routage recalculée : w(0.3) = 0.21
|
||||
|
||||
== Impact sur la Phase 2 (indexerLivenessVote) ==
|
||||
note over IA: Si un autre nœud demande consensus,\\nN n'est plus dans StreamRecords.\\nN absent de la réponse Alive[].
|
||||
|
||||
note over IB: Même GC effectué côté IB.\\nN retiré de StreamRecords[ProtocolHB].
|
||||
== GC côté Indexer B ==
|
||||
note over IB: Même GC effectué.\nN retiré de StreamRecords[ProtocolHeartbeat].
|
||||
|
||||
== Reconnexion éventuelle du nœud ==
|
||||
N -> N: redémarrage
|
||||
N -> IA: SendHeartbeat /opencloud/heartbeat/1.0\\nHeartbeat{Score: X, IndexersBinded: 2}
|
||||
IA -> IA: HandleHeartbeat → nouveau UptimeTracker(FirstSeen=now)\\nStreamRecords[ProtocolHB][N] recréé
|
||||
note over IA: N de retour avec FirstSeen frais.\\ndynamicMinScore élevé tant que age < 24h.
|
||||
N -> IA: SendHeartbeat /opencloud/heartbeat/1.0\nHeartbeat{name, PeerID_N, IndexersBinded, need, record}
|
||||
IA -> IA: HandleHeartbeat → UptimeTracker(FirstSeen=now)\nStreamRecords[ProtocolHeartbeat][N] recréé\nRepublish PeerRecord N dans DHT
|
||||
note over IA: N de retour avec FirstSeen frais.\ndynamicMinScore élevé tant que age < 24h.\n(phase de grâce : 2 ticks avant scoring)
|
||||
|
||||
@enduml
|
||||
|
||||
@@ -3,73 +3,86 @@
|
||||
Tous les fichiers sont au format [PlantUML](https://plantuml.com/).
|
||||
Rendu possible via VS Code (extension PlantUML), IntelliJ, ou [plantuml.com/plantuml](https://www.plantuml.com/plantuml/uml/).
|
||||
|
||||
## Diagrammes de séquence (flux internes)
|
||||
> **Note :** Les diagrammes 06, 07, 12, 14–24 et plusieurs protocoles ci-dessous
|
||||
> concernaient l'architecture à 3 niveaux (node → indexer → native indexer),
|
||||
> supprimée dans la branche `feature/no_native_consortium`. Ces fichiers sont
|
||||
> conservés à titre historique. Les diagrammes actifs sont indiqués ci-dessous.
|
||||
|
||||
## Diagrammes actifs (architecture 2 niveaux)
|
||||
|
||||
### Séquences principales
|
||||
|
||||
| Fichier | Description |
|
||||
|---------|-------------|
|
||||
| `01_node_init.puml` | Initialisation complète d'un Node (libp2p host, GossipSub, indexers, StreamService, PubSubService, NATS) |
|
||||
| `02_node_claim.puml` | Enregistrement du nœud auprès des indexeurs (`claimInfo` + `publishPeerRecord`) |
|
||||
| `03_indexer_heartbeat.puml` | Protocole heartbeat avec score 5 composants (U/B/D/L/F), UptimeTracker, dynamicMinScore |
|
||||
| `04_indexer_publish.puml` | Publication d'un `PeerRecord` vers l'indexeur → DHT |
|
||||
| `05_indexer_get.puml` | Résolution d'un pair via l'indexeur (`GetPeerRecord` + `handleNodeGet` + DHT) |
|
||||
| `06_native_registration.puml` | Enregistrement d'un indexeur auprès du Native (FillRate, signature, TTL 90s, unsubscribe) |
|
||||
| `07_native_get_consensus.puml` | `ConnectToNatives` : fetch pool + Phase 1 (clientSideConsensus) + Phase 2 (indexerLivenessVote) |
|
||||
| `08_nats_create_resource.puml` | Handler NATS `CREATE_RESOURCE` : connexion/déconnexion d'un partner |
|
||||
| `01_node_init.puml` | Initialisation d'un Node : libp2p host + PSK + ConnectionGater + ConnectToIndexers + SendHeartbeat + DHT proactive |
|
||||
| `02_node_claim.puml` | Enregistrement du nœud : `claimInfo` + `publishPeerRecord` → indexeurs → DHT |
|
||||
| `03_indexer_heartbeat.puml` | Protocole heartbeat bidirectionnel : challenges PeerID + DHT + witness, scoring 7 dimensions, suggestions, SuggestMigrate |
|
||||
| `04_indexer_publish.puml` | Publication d'un `PeerRecord` vers l'indexeur → DHT (PutValue /node, /name, /pid) |
|
||||
| `05_indexer_get.puml` | Résolution d'un pair : `GetPeerRecord` → indexeur → DHT si absent local |
|
||||
| `08_nats_create_resource.puml` | Handler NATS `CREATE_RESOURCE` : propagation partenaires on-demand |
|
||||
| `09_nats_propagation.puml` | Handler NATS `PROPALGATION_EVENT` : delete, considers, planner, search |
|
||||
| `10_pubsub_search.puml` | Recherche gossip globale (type `"all"`) via GossipSub |
|
||||
| `10_pubsub_search.puml` | Recherche gossip globale (GossipSub /opencloud/search/1.0) |
|
||||
| `11_stream_search.puml` | Recherche directe par stream (type `"known"` ou `"partner"`) |
|
||||
| `12_partner_heartbeat.puml` | Heartbeat partner + propagation CRUD vers les partenaires |
|
||||
| `13_planner_flow.puml` | Session planner (ouverture, échange, fermeture) |
|
||||
| `14_native_offload_gc.puml` | Boucles background du Native Indexer (offload, DHT refresh, GC) |
|
||||
|
||||
## Diagrammes de topologie et flux de panne
|
||||
|
||||
### Configurations réseau
|
||||
### Résilience et pool management
|
||||
|
||||
| Fichier | Description |
|
||||
|---------|-------------|
|
||||
| `15_archi_config_nominale.puml` | C1 — Topologie nominale : 2 natifs · 2 indexeurs · 2 nœuds, tous flux |
|
||||
| `16_archi_config_seed.puml` | C2 — Mode seed sans natif : indexeurs à AdmittedAt=0, risque D20 actif |
|
||||
| `hb_failure_evict.puml` | HeartbeatFailure → evictPeer → TriggerConsensus ou DHT replenish |
|
||||
| `hb_last_indexer.puml` | Protection last-indexer → reconnectToSeeds → retryUntilSeedResponds |
|
||||
| `dht_discovery.puml` | Découverte proactive DHT : Provide/FindProviders, SelectByFillRate, dhtCache |
|
||||
| `connection_gater.puml` | ConnectionGater : DB blacklist → DHT sequential check (transport-error fallthrough) |
|
||||
|
||||
### Flux de démarrage
|
||||
## Diagrammes historiques (architecture 3 niveaux — obsolètes)
|
||||
|
||||
Ces fichiers documentent l'ancienne architecture. Ils ne correspondent plus
|
||||
au code en production.
|
||||
|
||||
| Fichier | Description |
|
||||
|---------|-------------|
|
||||
| `17_startup_consensus_phase1_phase2.puml` | Démarrage nominal : Phase 1 (admission native) + Phase 2 (liveness vote) |
|
||||
| `18_startup_seed_discovers_native.puml` | Upgrade seed → nominal : goroutine async découvre un natif via l'indexeur |
|
||||
| `06_native_registration.puml` | Enregistrement d'un indexeur auprès du Native (supprimé) |
|
||||
| `07_native_get_consensus.puml` | `ConnectToNatives` : fetch pool + Phase 1 + Phase 2 (supprimé) |
|
||||
| `12_partner_heartbeat.puml` | Heartbeat partner permanent (supprimé — connexions on-demand) |
|
||||
| `14_native_offload_gc.puml` | Boucles background Native Indexer (supprimé) |
|
||||
| `15_archi_config_nominale.puml` | Topologie nominale avec natifs (obsolète) |
|
||||
| `16_archi_config_seed.puml` | Mode seed sans natif (obsolète) |
|
||||
| `17_startup_consensus_phase1_phase2.puml` | Démarrage avec consensus natifs (supprimé) |
|
||||
| `18_startup_seed_discovers_native.puml` | Upgrade seed → native (supprimé) |
|
||||
| `19_failure_indexer_crash.puml` | F1 — replenish depuis natif (supprimé) |
|
||||
| `20_failure_both_indexers_selfdelegate.puml` | F2 — IsSelfFallback native (supprimé) |
|
||||
| `21_failure_native_one_down.puml` | F3 — panne 1 natif (supprimé) |
|
||||
| `22_failure_both_natives.puml` | F4 — panne 2 natifs (supprimé) |
|
||||
| `23_failure_native_plus_indexer.puml` | F5 — panne combinée natif + indexeur (supprimé) |
|
||||
| `24_failure_retry_lost_native.puml` | F6 — retryLostNative (supprimé) |
|
||||
| `25_failure_node_gc.puml` | F7 — GC nœud côté indexeur (toujours valide) |
|
||||
|
||||
### Flux de panne
|
||||
|
||||
| Fichier | Code | Description |
|
||||
|---------|------|-------------|
|
||||
| `19_failure_indexer_crash.puml` | F1 | Panne 1 indexeur → replenish depuis natif → IC admis |
|
||||
| `20_failure_both_indexers_selfdelegate.puml` | F2 | Panne 2 indexeurs → natif `IsSelfFallback=true`, runOffloadLoop |
|
||||
| `21_failure_native_one_down.puml` | F3 | Panne 1 natif → quorum 1/1 suffisant, mode dégradé |
|
||||
| `22_failure_both_natives.puml` | F4 | Panne 2 natifs → fallback pool pré-validé, retryLostNative |
|
||||
| `23_failure_native_plus_indexer.puml` | F5 | Panne combinée : 1 natif + 1 indexeur → double replenish |
|
||||
| `24_failure_retry_lost_native.puml` | F6 | Panne réseau transitoire → retryLostNative (30s ticker) |
|
||||
| `25_failure_node_gc.puml` | F7 | Crash nœud → GC indexeur (120s), AfterDelete, fill rate recalculé |
|
||||
|
||||
## Protocoles libp2p utilisés (référence complète)
|
||||
## Protocoles libp2p actifs
|
||||
|
||||
| Protocole | Description |
|
||||
|-----------|-------------|
|
||||
| `/opencloud/heartbeat/1.0` | Heartbeat universel : node→indexeur, indexeur→native, native→native (long-lived) |
|
||||
| `/opencloud/heartbeat/1.0` | Heartbeat bidirectionnel node→indexeur (long-lived) |
|
||||
| `/opencloud/probe/1.0` | Sonde de bande passante (echo, mesure latence + débit) |
|
||||
| `/opencloud/resource/heartbeat/partner/1.0` | Heartbeat node ↔ partner (long-lived) |
|
||||
| `/opencloud/witness/1.0` | Requête témoin : "quel est ton score de l'indexeur X ?" |
|
||||
| `/opencloud/record/publish/1.0` | Publication `PeerRecord` vers indexeur |
|
||||
| `/opencloud/record/get/1.0` | Requête `GetPeerRecord` vers indexeur |
|
||||
| `/opencloud/native/subscribe/1.0` | Enregistrement indexeur auprès du native (+ FillRate) |
|
||||
| `/opencloud/native/unsubscribe/1.0` | Désenregistrement explicite indexeur → native |
|
||||
| `/opencloud/native/indexers/1.0` | Requête de pool d'indexeurs au native (tri par w(F)=F×(1-F)) |
|
||||
| `/opencloud/native/consensus/1.0` | Phase 1 : validation de pool d'indexeurs (vote majoritaire natifs) |
|
||||
| `/opencloud/native/peers/1.0` | Demande de pairs natifs connus (replenish mesh natif) |
|
||||
| `/opencloud/indexer/natives/1.0` | Demande d'adresses de natifs connus par un indexeur |
|
||||
| `/opencloud/indexer/consensus/1.0` | Phase 2 : liveness vote (LastSeen ≤ 120s && LastScore ≥ 30) |
|
||||
| `/opencloud/resource/search/1.0` | Recherche de ressources entre peers |
|
||||
| `/opencloud/resource/create/1.0` | Propagation création ressource vers partner |
|
||||
| `/opencloud/resource/update/1.0` | Propagation mise à jour ressource vers partner |
|
||||
| `/opencloud/resource/delete/1.0` | Propagation suppression ressource vers partner |
|
||||
| `/opencloud/resource/create/1.0` | Propagation création ressource → partner |
|
||||
| `/opencloud/resource/update/1.0` | Propagation mise à jour ressource → partner |
|
||||
| `/opencloud/resource/delete/1.0` | Propagation suppression ressource → partner |
|
||||
| `/opencloud/resource/planner/1.0` | Session planner (booking) |
|
||||
| `/opencloud/resource/verify/1.0` | Vérification signature ressource |
|
||||
| `/opencloud/resource/considers/1.0` | Transmission d'un "considers" d'exécution |
|
||||
| `/opencloud/resource/considers/1.0` | Transmission d'un considers d'exécution |
|
||||
|
||||
## Protocoles supprimés (architecture native)
|
||||
|
||||
| Protocole | Raison |
|
||||
|-----------|--------|
|
||||
| `/opencloud/native/subscribe/1.0` | Tier native supprimé |
|
||||
| `/opencloud/native/unsubscribe/1.0` | Tier native supprimé |
|
||||
| `/opencloud/native/indexers/1.0` | Remplacé par DHT FindProviders |
|
||||
| `/opencloud/native/consensus/1.0` | Remplacé par TriggerConsensus léger |
|
||||
| `/opencloud/native/peers/1.0` | Tier native supprimé |
|
||||
| `/opencloud/indexer/natives/1.0` | Tier native supprimé |
|
||||
| `/opencloud/indexer/consensus/1.0` | Remplacé par TriggerConsensus |
|
||||
| `/opencloud/resource/heartbeat/partner/1.0` | Heartbeat partner supprimé — on-demand |
|
||||
|
||||
69
docs/diagrams/connection_gater.puml
Normal file
69
docs/diagrams/connection_gater.puml
Normal file
@@ -0,0 +1,69 @@
|
||||
@startuml connection_gater
|
||||
title ConnectionGater — Vérification à l'admission (InterceptSecured)
|
||||
|
||||
participant "Remote Peer\n(inbound)" as Remote
|
||||
participant "libp2p\nhost A" as Host
|
||||
participant "OCConnectionGater" as Gater
|
||||
participant "DB (oc-lib)" as DB
|
||||
participant "Indexer X\n(joignable)" as IX
|
||||
participant "Indexer Y\n(injoignable)" as IY
|
||||
|
||||
Remote -> Host: inbound connection (post-PSK, post-TLS)
|
||||
Host -> Gater: InterceptSecured(dir=Inbound, id=RemotePeerID, conn)
|
||||
|
||||
alt dir == Outbound
|
||||
Gater --> Host: true (outbound toujours autorisé)
|
||||
end
|
||||
|
||||
== Étape 1 : Vérification base de données ==
|
||||
|
||||
Gater -> DB: NewRequestAdmin(PEER).Search(\n Filter: peer_id = RemotePeerID\n)
|
||||
DB --> Gater: []peer.Peer
|
||||
|
||||
alt trouvé AND relation == BLACKLIST
|
||||
Gater --> Host: false (refusé — blacklisté)
|
||||
Host ->x Remote: connexion fermée
|
||||
end
|
||||
|
||||
alt trouvé AND relation != BLACKLIST
|
||||
Gater --> Host: true (connu et non blacklisté)
|
||||
end
|
||||
|
||||
== Étape 2 : Vérification DHT (peer inconnu en DB) ==
|
||||
|
||||
note over Gater: Peer inconnu → vérifier qu'il existe\ndans le réseau DHT
|
||||
|
||||
Gater -> Gater: getReq = GetValue{PeerID: RemotePeerID}
|
||||
|
||||
loop Pour chaque indexeur (ordre aléatoire — Shuffle)
|
||||
|
||||
alt Indexer IY injoignable (transport error)
|
||||
Gater -> IY: h.Connect(ctxTTL, IY_AddrInfo)
|
||||
IY -->x Gater: connexion échouée
|
||||
note over Gater: reachable=false\n→ essaie le suivant
|
||||
end
|
||||
|
||||
alt Indexer IX joignable
|
||||
Gater -> IX: h.Connect(ctxTTL, IX_AddrInfo)
|
||||
IX --> Gater: OK
|
||||
Gater -> IX: TempStream /opencloud/record/get/1.0
|
||||
Gater -> IX: stream.Encode(GetValue{PeerID: RemotePeerID})
|
||||
IX -> IX: Recherche locale + DHT si absent
|
||||
IX --> Gater: GetResponse{Found: true/false, Records}
|
||||
note over Gater: reachable=true → réponse autoritaire\n(DHT distribué : un seul indexeur suffit)
|
||||
|
||||
alt Found == true
|
||||
Gater --> Host: true (pair connu du réseau)
|
||||
else Found == false
|
||||
Gater --> Host: false (refusé — inconnu du réseau)
|
||||
Host ->x Remote: connexion fermée
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
alt Aucun indexeur joignable
|
||||
note over Gater: Réseau naissant ou tous isolés.\nAutorisation par défaut.
|
||||
Gater --> Host: true
|
||||
end
|
||||
|
||||
@enduml
|
||||
56
docs/diagrams/dht_discovery.puml
Normal file
56
docs/diagrams/dht_discovery.puml
Normal file
@@ -0,0 +1,56 @@
|
||||
@startuml dht_discovery
|
||||
title Découverte DHT : Provide/FindProviders + SelectByFillRate + dhtCache indexeur
|
||||
|
||||
participant "Indexer A\n(nouveau)" as IA
|
||||
participant "DHT Network" as DHT
|
||||
participant "Node B\n(bootstrap)" as NodeB
|
||||
participant "Indexer A\n(existant)" as IAexist
|
||||
|
||||
== Inscription indexeur dans la DHT ==
|
||||
|
||||
note over IA: Démarrage IndexerService\nstartDHTProvide(fillRateFn)
|
||||
|
||||
IA -> IA: Attend adresse routable (max 60s)\nnon-loopback disponible
|
||||
|
||||
IA -> DHT: DHT.Bootstrap(ctx)\n→ routing table warmup
|
||||
|
||||
loop ticker RecommendedHeartbeatInterval (~20s)
|
||||
IA -> DHT: DHT.Provide(IndexerCID, true)\n← IndexerCID = CID(sha256("/opencloud/indexers"))
|
||||
note over DHT: L'indexeur est annoncé comme provider.\nTTL géré par libp2p-kad-dht.\nAuto-expire si Provide() s'arrête.
|
||||
end
|
||||
|
||||
== Cache DHT passif de l'indexeur ==
|
||||
|
||||
note over IA: startDHTCacheRefresh()\ngoroutine arrière-plan
|
||||
|
||||
IA -> IA: Initial delay 30s (routing table warmup)
|
||||
|
||||
loop ticker 2min
|
||||
IA -> DHT: DiscoverIndexersFromDHT(h, dht, 30)\n← FindProviders(IndexerCID, max=30)
|
||||
DHT --> IA: []AddrInfo (jusqu'à 30 candidats)
|
||||
IA -> IA: Filtre self\nSelectByFillRate(filtered, nil, 10)\n→ diversité /24, prior f=0.5 (fill rates inconnus)
|
||||
IA -> IA: dhtCache = selected (max 10)\n→ utilisé pour Suggestions dans BuildHeartbeatResponse
|
||||
end
|
||||
|
||||
== Découverte côté Node au bootstrap ==
|
||||
|
||||
NodeB -> NodeB: ConnectToIndexers → seeds ajoutés\nSendHeartbeat démarré
|
||||
|
||||
NodeB -> NodeB: goroutine proactive (après 5s warmup)
|
||||
|
||||
alt discoveryDHT == nil (node pur, pas d'IndexerService)
|
||||
NodeB -> DHT: initNodeDHT(h, seeds)\n← DHT client mode, bootstrappé sur seeds
|
||||
end
|
||||
|
||||
NodeB -> DHT: DiscoverIndexersFromDHT(h, discoveryDHT, need+extra)
|
||||
DHT --> NodeB: []AddrInfo candidats
|
||||
|
||||
NodeB -> NodeB: Filtre self\nSelectByFillRate(candidates, fillRates, need)\n→ pondération w(F) = F×(1-F)\n F=0.2 → w=0.16 (très probable)\n F=0.5 → w=0.25 (max)\n F=0.8 → w=0.16 (peu probable)\n→ filtre diversité /24
|
||||
|
||||
loop Pour chaque candidat retenu
|
||||
NodeB -> NodeB: Indexers.SetAddr(key, &addrInfo)\nNudgeIt() → heartbeat immédiat
|
||||
end
|
||||
|
||||
note over NodeB: Pool enrichi au-delà des seeds.\nScoring commence au premier heartbeat.\nSeeds restent IsSeed=true (stickiness).
|
||||
|
||||
@enduml
|
||||
41
docs/diagrams/hb_failure_evict.puml
Normal file
41
docs/diagrams/hb_failure_evict.puml
Normal file
@@ -0,0 +1,41 @@
|
||||
@startuml hb_failure_evict
|
||||
title HeartbeatFailure → evictPeer → TriggerConsensus ou DHT replenish
|
||||
|
||||
participant "Node A" as NodeA
|
||||
participant "Indexer X\n(défaillant)" as IX
|
||||
participant "Indexer Y\n(voter)" as IY
|
||||
participant "Indexer Z\n(voter)" as IZ
|
||||
participant "DHT" as DHT
|
||||
participant "Indexer NEW\n(candidat)" as INEW
|
||||
|
||||
note over NodeA: SendHeartbeat tick — Indexer X dans le pool
|
||||
|
||||
NodeA -> IX: stream.Encode(Heartbeat{...})
|
||||
IX -->x NodeA: timeout / transport error
|
||||
|
||||
NodeA -> NodeA: HeartbeatFailure(h, proto, dir, addr_X, info_X, isIndexerHB=true, maxPool)
|
||||
|
||||
NodeA -> NodeA: evictPeer(dir, addr_X, id_X, proto)\n→ Streams.Delete(proto, &id_X)\n→ DeleteAddr(addr_X)\n→ DeleteScore(addr_X)\n→ voters = remaining AddrInfos
|
||||
|
||||
NodeA -> NodeA: poolSize = len(dir.GetAddrs())
|
||||
|
||||
alt poolSize == 0
|
||||
NodeA -> NodeA: reconnectToSeeds()\n→ réinjecte IndexerAddresses (IsSeed=true)
|
||||
alt seeds ajoutés
|
||||
NodeA -> NodeA: need = maxPool\nNudgeIt() → tick immédiat
|
||||
else aucun seed configuré ou seeds injoignables
|
||||
NodeA -> NodeA: go retryUntilSeedResponds()\n(backoff 10s→5min, panic si IndexerAddresses vide)
|
||||
end
|
||||
else poolSize > 0 AND len(voters) > 0
|
||||
NodeA -> NodeA: go TriggerConsensus(h, voters, need)
|
||||
NodeA -> IY: stream GET → GetValue{Key: candidate_DID}
|
||||
IY --> NodeA: GetResponse{Found, Records}
|
||||
NodeA -> IZ: stream GET → GetValue{Key: candidate_DID}
|
||||
IZ --> NodeA: GetResponse{Found, Records}
|
||||
note over NodeA: Quorum check:\nfound=true AND lastSeen ≤ 2×interval\nAND lastScore ≥ 30\n→ majorité → admission INEW
|
||||
NodeA -> NodeA: Indexers.SetAddr(addr_NEW, &INEW_AddrInfo)\nIndexers.SetScore(addr_NEW, Score{IsSeed:false})\nNudgeIt()
|
||||
else poolSize > 0 AND len(voters) == 0
|
||||
NodeA -> DHT: go replenishIndexersFromDHT(h, need)\nDiscoverIndexersFromDHT → SelectByFillRate\n→ add to Indexers Directory
|
||||
end
|
||||
|
||||
@enduml
|
||||
46
docs/diagrams/hb_last_indexer.puml
Normal file
46
docs/diagrams/hb_last_indexer.puml
Normal file
@@ -0,0 +1,46 @@
|
||||
@startuml hb_last_indexer
|
||||
title Protection last-indexer → reconnectToSeeds → retryUntilSeedResponds
|
||||
|
||||
participant "Node A" as NodeA
|
||||
participant "Indexer LAST\n(seul restant)" as IL
|
||||
participant "Seed Indexer\n(config)" as SEED
|
||||
participant "DHT" as DHT
|
||||
|
||||
note over NodeA: Pool = 1 indexeur (LAST)\nIsSeed=false, score bas depuis longtemps
|
||||
|
||||
== Tentative d'éviction par score ==
|
||||
NodeA -> NodeA: score < minScore\nAND TotalOnline ≥ 2×interval\nAND !IsSeed\nAND len(pool) > 1 ← FAUX : pool == 1
|
||||
|
||||
note over NodeA: Garde active : len(pool) == 1\n→ éviction par score BLOQUÉE\nLAST reste dans le pool
|
||||
|
||||
== Panne réseau (heartbeat fail) ==
|
||||
NodeA -> IL: stream.Encode(Heartbeat{...})
|
||||
IL -->x NodeA: timeout
|
||||
|
||||
NodeA -> NodeA: HeartbeatFailure → evictPeer(LAST)\npoolSize = 0
|
||||
|
||||
NodeA -> NodeA: reconnectToSeeds()\n→ parse IndexerAddresses (conf)\n→ SetAddr + SetScore(IsSeed=true) pour chaque seed
|
||||
|
||||
alt seeds ajoutés (IndexerAddresses non vide)
|
||||
NodeA -> NodeA: NudgeIt() → tick immédiat
|
||||
NodeA -> SEED: Heartbeat{...} (via SendHeartbeat nudge)
|
||||
SEED --> NodeA: HeartbeatResponse{fillRate, ...}
|
||||
note over NodeA: Pool rétabli via seeds.\nDHT proactive discovery reprend.
|
||||
|
||||
else IndexerAddresses vide
|
||||
NodeA -> NodeA: go retryUntilSeedResponds()
|
||||
note over NodeA: panic immédiat :\n"pool is empty and no seed indexers configured"\n→ arrêt du processus
|
||||
end
|
||||
|
||||
== retryUntilSeedResponds (si seeds non répondants) ==
|
||||
loop backoff exponentiel (10s → 20s → ... → 5min)
|
||||
NodeA -> NodeA: time.Sleep(backoff)
|
||||
NodeA -> NodeA: len(Indexers.GetAddrs()) > 0?\n→ oui : retour (quelqu'un a refillé)
|
||||
NodeA -> NodeA: reconnectToSeeds()
|
||||
alt pool > 0 après reconnect
|
||||
NodeA -> NodeA: NudgeIt()\nDHT.Bootstrap(ctx, 15s)
|
||||
note over NodeA: Sortie de la boucle.\nHeartbeat normal reprend.
|
||||
end
|
||||
end
|
||||
|
||||
@enduml
|
||||
Reference in New Issue
Block a user