Discovery Nano the light version.
This commit is contained in:
@@ -92,10 +92,6 @@ type SearchQuery struct {
|
||||
// SearchPeerResult is sent by a responding indexer to the emitting indexer
|
||||
// via ProtocolSearchPeerResponse, and forwarded by the emitting indexer to
|
||||
// the node on the open ProtocolSearchPeer stream.
|
||||
type SearchPeerResult struct {
|
||||
QueryID string `json:"query_id"`
|
||||
Records []SearchHit `json:"records"`
|
||||
}
|
||||
|
||||
// SearchHit is a single peer found during distributed search.
|
||||
type SearchHit struct {
|
||||
|
||||
@@ -203,6 +203,9 @@ func waitResults[T interface{}](topic *pubsub.Topic, s *LongLivedPubSubService,
|
||||
if errors.Is(err, context.DeadlineExceeded) {
|
||||
// timeout hit, no message before deadline kill subsciption.
|
||||
s.PubsubMu.Lock()
|
||||
if s.LongLivedPubSubs[proto] != nil {
|
||||
s.LongLivedPubSubs[proto].Close()
|
||||
}
|
||||
delete(s.LongLivedPubSubs, proto)
|
||||
s.PubsubMu.Unlock()
|
||||
return
|
||||
@@ -214,6 +217,5 @@ func waitResults[T interface{}](topic *pubsub.Topic, s *LongLivedPubSubService,
|
||||
continue
|
||||
}
|
||||
f(ctx, evt, fmt.Sprintf("%v", proto))
|
||||
fmt.Println("DEADLOCK ?")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -101,6 +101,10 @@ func (ix *LongLivedStreamRecordedService[T]) gc() {
|
||||
evicted = append(evicted, gcEntry{pid, name, did})
|
||||
for _, sstreams := range ix.StreamRecords {
|
||||
if sstreams[pid] != nil {
|
||||
if sstreams[pid].HeartbeatStream != nil && sstreams[pid].HeartbeatStream.Stream != nil {
|
||||
sstreams[pid].HeartbeatStream.Stream.Close()
|
||||
}
|
||||
|
||||
delete(sstreams, pid)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -184,12 +184,15 @@ func TempStream(h host.Host, ad pp.AddrInfo, proto protocol.ID, did string, stre
|
||||
}
|
||||
ctxTTL, cancelTTL := context.WithTimeout(context.Background(), expiry)
|
||||
defer cancelTTL()
|
||||
|
||||
if h.Network().Connectedness(ad.ID) != network.Connected {
|
||||
if err := h.Connect(ctxTTL, ad); err != nil {
|
||||
fmt.Println("Connectedness", ad.ID, err)
|
||||
|
||||
return streams, err
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Println("PROTO", streams[proto])
|
||||
if streams[proto] != nil && streams[proto][ad.ID] != nil {
|
||||
return streams, nil
|
||||
} else if s, err := h.NewStream(ctxTTL, ad.ID, proto); err == nil {
|
||||
@@ -200,6 +203,9 @@ func TempStream(h host.Host, ad pp.AddrInfo, proto protocol.ID, did string, stre
|
||||
mu.Unlock()
|
||||
time.AfterFunc(expiry, func() {
|
||||
mu.Lock()
|
||||
if streams[proto] != nil && streams[proto][ad.ID] != nil && streams[proto][ad.ID].Stream != nil {
|
||||
streams[proto][ad.ID].Stream.Close()
|
||||
}
|
||||
delete(streams[proto], ad.ID)
|
||||
mu.Unlock()
|
||||
})
|
||||
@@ -212,6 +218,7 @@ func TempStream(h host.Host, ad pp.AddrInfo, proto protocol.ID, did string, stre
|
||||
mu.Unlock()
|
||||
return streams, nil
|
||||
} else {
|
||||
fmt.Println("ERRER", err)
|
||||
return streams, err
|
||||
}
|
||||
}
|
||||
|
||||
@@ -33,10 +33,12 @@ const maxTTLSeconds = 86400 // 24h
|
||||
const tombstoneTTL = 10 * time.Minute
|
||||
|
||||
type PeerRecordPayload struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
DID string `json:"did"`
|
||||
PubKey []byte `json:"pub_key"`
|
||||
PubKey []byte `json:"public_key"`
|
||||
ExpiryDate time.Time `json:"expiry_date"`
|
||||
IsNano bool `json:"is_nano"`
|
||||
// TTLSeconds is the publisher's declared lifetime for this record in seconds.
|
||||
// 0 means "use the default (120 s)". Included in the signed payload so it
|
||||
// cannot be altered by an intermediary.
|
||||
@@ -45,6 +47,8 @@ type PeerRecordPayload struct {
|
||||
|
||||
type PeerRecord struct {
|
||||
PeerRecordPayload
|
||||
CreationDate time.Time `json:"creation_date"`
|
||||
UpdateDate time.Time `json:"update_date"`
|
||||
PeerID string `json:"peer_id"`
|
||||
APIUrl string `json:"api_url"`
|
||||
StreamAddress string `json:"stream_address"`
|
||||
@@ -184,7 +188,7 @@ func (ix *IndexerService) isPeerKnown(pid lpp.ID) bool {
|
||||
And: map[string][]dbs.Filter{
|
||||
"peer_id": {{Operator: dbs.EQUAL.String(), Value: pid.String()}},
|
||||
},
|
||||
}, pid.String(), false)
|
||||
}, pid.String(), false, 0, 1)
|
||||
for _, item := range results.Data {
|
||||
p, ok := item.(*pp.Peer)
|
||||
if !ok || p.PeerID != pid.String() {
|
||||
|
||||
@@ -3,6 +3,7 @@ package indexer
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@@ -10,8 +11,8 @@ import (
|
||||
"oc-discovery/daemons/node/common"
|
||||
|
||||
oclib "cloud.o-forge.io/core/oc-lib"
|
||||
pp "github.com/libp2p/go-libp2p/core/peer"
|
||||
"github.com/libp2p/go-libp2p/core/network"
|
||||
pp "github.com/libp2p/go-libp2p/core/peer"
|
||||
)
|
||||
|
||||
const TopicSearchPeer = "oc-search-peer"
|
||||
@@ -46,31 +47,34 @@ func (ix *IndexerService) updateReferent(pid pp.ID, rec PeerRecord, referent boo
|
||||
|
||||
// searchReferenced looks up nodes in referencedNodes matching the query.
|
||||
// Matches on peerID (exact), DID (exact), or name (case-insensitive contains).
|
||||
func (ix *IndexerService) searchReferenced(peerID, did, name string) []common.SearchHit {
|
||||
func (ix *IndexerService) searchReferenced(peerID, did, name string) []PeerRecord {
|
||||
ix.referencedNodesMu.RLock()
|
||||
defer ix.referencedNodesMu.RUnlock()
|
||||
nameLow := strings.ToLower(name)
|
||||
var hits []common.SearchHit
|
||||
var hits []PeerRecord
|
||||
for pid, rec := range ix.referencedNodes {
|
||||
pidStr := pid.String()
|
||||
matchPeerID := peerID != "" && pidStr == peerID
|
||||
matchDID := did != "" && rec.DID == did
|
||||
matchName := name != "" && strings.Contains(strings.ToLower(rec.Name), nameLow)
|
||||
if matchPeerID || matchDID || matchName {
|
||||
hits = append(hits, common.SearchHit{
|
||||
PeerID: pidStr,
|
||||
DID: rec.DID,
|
||||
Name: rec.Name,
|
||||
})
|
||||
rec.ID = rec.DID
|
||||
hits = append(hits, rec)
|
||||
}
|
||||
}
|
||||
return hits
|
||||
}
|
||||
|
||||
type SearchPeerResult struct {
|
||||
QueryID string `json:"query_id"`
|
||||
Records []PeerRecord `json:"records"`
|
||||
}
|
||||
|
||||
// handleSearchPeer is the ProtocolSearchPeer handler.
|
||||
// The node opens this stream, sends a SearchPeerRequest, and reads results
|
||||
// as they stream in. The stream stays open until timeout or node closes it.
|
||||
func (ix *IndexerService) handleSearchPeer(s network.Stream) {
|
||||
fmt.Println("handleSearchPeer")
|
||||
logger := oclib.GetLogger()
|
||||
defer s.Reset()
|
||||
|
||||
@@ -78,7 +82,7 @@ func (ix *IndexerService) handleSearchPeer(s network.Stream) {
|
||||
logger.Warn().Str("peer", s.Conn().RemotePeer().String()).Msg("[search] unknown peer, rejecting stream")
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Println("SearchPeerRequest")
|
||||
var req common.SearchPeerRequest
|
||||
if err := json.NewDecoder(s).Decode(&req); err != nil || req.QueryID == "" {
|
||||
return
|
||||
@@ -94,7 +98,7 @@ func (ix *IndexerService) handleSearchPeer(s network.Stream) {
|
||||
}()
|
||||
defer streamCancel()
|
||||
|
||||
resultCh := make(chan []common.SearchHit, 16)
|
||||
resultCh := make(chan []PeerRecord, 16)
|
||||
ix.pendingSearchesMu.Lock()
|
||||
ix.pendingSearches[req.QueryID] = resultCh
|
||||
ix.pendingSearchesMu.Unlock()
|
||||
@@ -106,9 +110,10 @@ func (ix *IndexerService) handleSearchPeer(s network.Stream) {
|
||||
|
||||
// Check own referencedNodes immediately.
|
||||
if hits := ix.searchReferenced(req.PeerID, req.DID, req.Name); len(hits) > 0 {
|
||||
fmt.Println("hits", hits)
|
||||
resultCh <- hits
|
||||
}
|
||||
|
||||
fmt.Println("publishSearchQuery")
|
||||
// Broadcast search on GossipSub so other indexers can respond.
|
||||
ix.publishSearchQuery(req.QueryID, req.PeerID, req.DID, req.Name)
|
||||
|
||||
@@ -119,7 +124,8 @@ func (ix *IndexerService) handleSearchPeer(s network.Stream) {
|
||||
for {
|
||||
select {
|
||||
case hits := <-resultCh:
|
||||
if err := enc.Encode(common.SearchPeerResult{QueryID: req.QueryID, Records: hits}); err != nil {
|
||||
fmt.Println("resultCh hits", hits)
|
||||
if err := enc.Encode(SearchPeerResult{QueryID: req.QueryID, Records: hits}); err != nil {
|
||||
logger.Debug().Err(err).Msg("[search] stream write failed")
|
||||
return
|
||||
}
|
||||
@@ -145,13 +151,15 @@ func (ix *IndexerService) handleSearchPeer(s network.Stream) {
|
||||
// Another indexer opens this stream to deliver hits for a pending queryID.
|
||||
func (ix *IndexerService) handleSearchPeerResponse(s network.Stream) {
|
||||
defer s.Reset()
|
||||
var result common.SearchPeerResult
|
||||
fmt.Println("RECEIVED SEARCH")
|
||||
var result SearchPeerResult
|
||||
if err := json.NewDecoder(s).Decode(&result); err != nil || result.QueryID == "" {
|
||||
return
|
||||
}
|
||||
ix.pendingSearchesMu.Lock()
|
||||
ch := ix.pendingSearches[result.QueryID]
|
||||
ix.pendingSearchesMu.Unlock()
|
||||
fmt.Println("RECEIVED", result.QueryID, ix.pendingSearches[result.QueryID])
|
||||
if ch != nil {
|
||||
select {
|
||||
case ch <- result.Records:
|
||||
@@ -213,21 +221,28 @@ func (ix *IndexerService) onSearchQuery(q common.SearchQuery) {
|
||||
if q.EmitterID == ix.Host.ID().String() {
|
||||
return
|
||||
}
|
||||
fmt.Println("ON SEARCH QUERY")
|
||||
hits := ix.searchReferenced(q.PeerID, q.DID, q.Name)
|
||||
fmt.Println("ON SEARCH QUERY HITS", hits)
|
||||
if len(hits) == 0 {
|
||||
return
|
||||
}
|
||||
emitterID, err := pp.Decode(q.EmitterID)
|
||||
if err != nil {
|
||||
fmt.Println("ON SEARCH QUERY err DECODE", err)
|
||||
return
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
s, err := ix.Host.NewStream(ctx, emitterID, common.ProtocolSearchPeerResponse)
|
||||
if err != nil {
|
||||
fmt.Println("ON SEARCH QUERY err NewStream", emitterID, err)
|
||||
return
|
||||
}
|
||||
defer s.Reset()
|
||||
fmt.Println("ON ", emitterID)
|
||||
defer s.Close()
|
||||
s.SetDeadline(time.Now().Add(5 * time.Second))
|
||||
json.NewEncoder(s).Encode(common.SearchPeerResult{QueryID: q.QueryID, Records: hits})
|
||||
err = json.NewEncoder(s).Encode(SearchPeerResult{QueryID: q.QueryID, Records: hits})
|
||||
fmt.Println("SEARCH ERR", err)
|
||||
s.CloseWrite()
|
||||
}
|
||||
|
||||
@@ -61,7 +61,7 @@ type IndexerService struct {
|
||||
referencedNodes map[pp.ID]PeerRecord
|
||||
referencedNodesMu sync.RWMutex
|
||||
// pendingSearches maps queryID → result channel for in-flight searches.
|
||||
pendingSearches map[string]chan []common.SearchHit
|
||||
pendingSearches map[string]chan []PeerRecord
|
||||
pendingSearchesMu sync.Mutex
|
||||
// behavior tracks per-node compliance (heartbeat rate, publish/get volume,
|
||||
// identity consistency, signature failures).
|
||||
@@ -91,7 +91,7 @@ func NewIndexerService(h host.Host, ps *pubsub.PubSub, maxNode int) *IndexerServ
|
||||
LongLivedStreamRecordedService: common.NewStreamRecordedService[PeerRecord](h, maxNode),
|
||||
isStrictIndexer: ps == nil,
|
||||
referencedNodes: map[pp.ID]PeerRecord{},
|
||||
pendingSearches: map[string]chan []common.SearchHit{},
|
||||
pendingSearches: map[string]chan []PeerRecord{},
|
||||
behavior: newNodeBehaviorTracker(),
|
||||
deletedDIDs: make(map[string]time.Time),
|
||||
eventQueue: &common.MembershipEventQueue{},
|
||||
|
||||
@@ -4,7 +4,7 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"oc-discovery/daemons/node/common"
|
||||
"oc-discovery/daemons/node/indexer"
|
||||
"oc-discovery/daemons/node/stream"
|
||||
"slices"
|
||||
|
||||
@@ -29,6 +29,11 @@ func ListenNATS(n *Node) {
|
||||
tools.PEER_BEHAVIOR_EVENT: func(resp tools.NATSResponse) { //nolint:typecheck
|
||||
handlePeerBehaviorEvent(n, resp)
|
||||
},
|
||||
// PEER_OBSERVE_EVENT is sent by oc-peer to start or stop observations
|
||||
// for a list of peer IDs, or to trigger a close-all.
|
||||
tools.PEER_OBSERVE_EVENT: func(resp tools.NATSResponse) {
|
||||
n.StreamService.HandleObserveNATSCommand(resp)
|
||||
},
|
||||
tools.PROPALGATION_EVENT: func(resp tools.NATSResponse) {
|
||||
if resp.FromApp == config.GetAppName() {
|
||||
return
|
||||
@@ -134,6 +139,21 @@ func ListenNATS(n *Node) {
|
||||
}
|
||||
n.StreamService.Mu.Unlock()
|
||||
}
|
||||
case tools.PB_OBSERVE:
|
||||
print("PROPALGATE OBSERVE")
|
||||
handleObserveEvent(n, propalgation)
|
||||
case tools.PB_OBSERVE_CLOSE:
|
||||
print("PROPALGATE CLOSE")
|
||||
handleObserveCloseEvent(n, propalgation)
|
||||
case tools.PB_PROPAGATE:
|
||||
// Another oc-discovery forwarded a heartbeat batch.
|
||||
// Re-emit on PEER_OBSERVE_RESPONSE_EVENT so the local oc-peer sees it.
|
||||
tools.NewNATSCaller().SetNATSPub(tools.PEER_OBSERVE_RESPONSE_EVENT, tools.NATSResponse{
|
||||
FromApp: resp.FromApp,
|
||||
Datatype: tools.PEER,
|
||||
Method: int(tools.PEER_OBSERVE_RESPONSE_EVENT),
|
||||
Payload: propalgation.Payload,
|
||||
})
|
||||
case tools.PB_CLOSE_SEARCH:
|
||||
if propalgation.DataType == int(tools.PEER) {
|
||||
n.peerSearches.Cancel(resp.User)
|
||||
@@ -141,16 +161,18 @@ func ListenNATS(n *Node) {
|
||||
n.StreamService.ResourceSearches.Cancel(resp.User)
|
||||
}
|
||||
case tools.PB_SEARCH:
|
||||
fmt.Println("PROPALGATE PEER")
|
||||
if propalgation.DataType == int(tools.PEER) {
|
||||
m := map[string]interface{}{}
|
||||
if err := json.Unmarshal(propalgation.Payload, &m); err == nil {
|
||||
needle := fmt.Sprintf("%v", m["search"])
|
||||
userKey := resp.User
|
||||
go n.SearchPeerRecord(userKey, needle, func(hit common.SearchHit) {
|
||||
go n.SearchPeerRecord(userKey, needle, func(hit indexer.PeerRecord) {
|
||||
if b, err := json.Marshal(hit); err == nil {
|
||||
tools.NewNATSCaller().SetNATSPub(tools.SEARCH_EVENT, tools.NATSResponse{
|
||||
FromApp: "oc-discovery",
|
||||
Datatype: tools.DataType(tools.PEER),
|
||||
User: userKey,
|
||||
Method: int(tools.SEARCH_EVENT),
|
||||
Payload: b,
|
||||
})
|
||||
@@ -240,3 +262,37 @@ func handlePeerBehaviorEvent(n *Node, resp tools.NATSResponse) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// handleObserveEvent processes a PB_OBSERVE PropalgationMessage from another
|
||||
// oc-discovery node, starting observation for the listed peers.
|
||||
func handleObserveEvent(n *Node, p tools.PropalgationMessage) {
|
||||
var cmd stream.ObserveCommand
|
||||
if err := json.Unmarshal(p.Payload, &cmd); err != nil {
|
||||
fmt.Println("handleObserveEvent: unmarshal error:", err)
|
||||
return
|
||||
}
|
||||
for _, sp := range cmd.Peers {
|
||||
if err := n.StreamService.OpenObserveStream(sp); err != nil {
|
||||
fmt.Println("handleObserveEvent: OpenObserveStream failed for", sp.PeerID, ":", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// handleObserveCloseEvent processes a PB_OBSERVE_CLOSE PropalgationMessage from
|
||||
// another oc-discovery node, stopping observation for the listed peer IDs.
|
||||
func handleObserveCloseEvent(n *Node, p tools.PropalgationMessage) {
|
||||
var cmd stream.ObserveCommand
|
||||
if err := json.Unmarshal(p.Payload, &cmd); err != nil {
|
||||
fmt.Println("handleObserveCloseEvent: unmarshal error:", err)
|
||||
return
|
||||
}
|
||||
if cmd.CloseAll {
|
||||
n.StreamService.CloseAllObserves()
|
||||
return
|
||||
}
|
||||
for _, peerID := range cmd.PeerIDs {
|
||||
if err := n.StreamService.CloseObserveStream(peerID); err != nil {
|
||||
fmt.Println("handleObserveCloseEvent: CloseObserveStream failed for", peerID, ":", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -113,6 +113,7 @@ func InitNode(isNode bool, isIndexer bool) (*Node, error) {
|
||||
if ttl <= 0 {
|
||||
ttl = indexer.DefaultTTLSeconds * time.Second
|
||||
}
|
||||
fresh.UpdateDate = time.Now().UTC()
|
||||
fresh.PeerRecordPayload.ExpiryDate = time.Now().UTC().Add(ttl)
|
||||
payload, _ := json.Marshal(fresh.PeerRecordPayload)
|
||||
fresh.Signature, err = priv.Sign(payload)
|
||||
@@ -141,7 +142,7 @@ func InitNode(isNode bool, isIndexer bool) (*Node, error) {
|
||||
And: map[string][]dbs.Filter{
|
||||
"peer_id": {{Operator: dbs.EQUAL.String(), Value: pid.String()}},
|
||||
},
|
||||
}, pid.String(), false)
|
||||
}, pid.String(), false, 0, 1)
|
||||
for _, item := range results.Data {
|
||||
p, ok := item.(*peer.Peer)
|
||||
if !ok || p.PeerID != pid.String() {
|
||||
@@ -228,7 +229,7 @@ func (d *Node) isPeerKnown(pid pp.ID) bool {
|
||||
And: map[string][]dbs.Filter{
|
||||
"peer_id": {{Operator: dbs.EQUAL.String(), Value: pid.String()}},
|
||||
},
|
||||
}, pid.String(), false)
|
||||
}, pid.String(), false, 0, 1)
|
||||
for _, item := range results.Data {
|
||||
p, ok := item.(*peer.Peer)
|
||||
if !ok || p.PeerID != pid.String() {
|
||||
@@ -267,15 +268,8 @@ func (d *Node) publishPeerRecord(
|
||||
if ttl <= 0 {
|
||||
ttl = indexer.DefaultTTLSeconds * time.Second
|
||||
}
|
||||
base := indexer.PeerRecordPayload{
|
||||
Name: rec.Name,
|
||||
DID: rec.DID,
|
||||
PubKey: rec.PubKey,
|
||||
TTLSeconds: rec.TTLSeconds,
|
||||
ExpiryDate: time.Now().UTC().Add(ttl),
|
||||
}
|
||||
payload, _ := json.Marshal(base)
|
||||
rec.PeerRecordPayload = base
|
||||
rec.ExpiryDate = time.Now().UTC().Add(ttl)
|
||||
payload, _ := json.Marshal(rec.PeerRecordPayload)
|
||||
rec.Signature, err = priv.Sign(payload)
|
||||
if err := json.NewEncoder(stream.Stream).Encode(&rec); err != nil { // then publish on stream
|
||||
return err
|
||||
@@ -288,7 +282,7 @@ func (d *Node) publishPeerRecord(
|
||||
// A new call for the same userKey cancels any previous search.
|
||||
// Results are pushed to onResult as they arrive; the function returns when
|
||||
// the stream closes (idle timeout, explicit cancel, or indexer unreachable).
|
||||
func (d *Node) SearchPeerRecord(userKey, needle string, onResult func(common.SearchHit)) {
|
||||
func (d *Node) SearchPeerRecord(userKey, needle string, onResult func(indexer.PeerRecord)) {
|
||||
logger := oclib.GetLogger()
|
||||
|
||||
idleTimeout := common.SearchIdleTimeout()
|
||||
@@ -306,7 +300,7 @@ func (d *Node) SearchPeerRecord(userKey, needle string, onResult func(common.Sea
|
||||
} else {
|
||||
req.Name = needle
|
||||
}
|
||||
|
||||
fmt.Println("PROPALGATE PEER", needle, common.Indexers.GetAddrs())
|
||||
for _, ad := range common.Indexers.GetAddrs() {
|
||||
if ad.Info == nil {
|
||||
continue
|
||||
@@ -330,7 +324,7 @@ func (d *Node) SearchPeerRecord(userKey, needle string, onResult func(common.Sea
|
||||
seen := map[string]struct{}{}
|
||||
dec := json.NewDecoder(s)
|
||||
for {
|
||||
var result common.SearchPeerResult
|
||||
var result indexer.SearchPeerResult
|
||||
if err := dec.Decode(&result); err != nil {
|
||||
break
|
||||
}
|
||||
@@ -416,7 +410,7 @@ func (d *Node) claimInfo(
|
||||
And: map[string][]dbs.Filter{ // search by name if no filters are provided
|
||||
"peer_id": {{Operator: dbs.EQUAL.String(), Value: d.Host.ID().String()}},
|
||||
},
|
||||
}, "", false)
|
||||
}, "", false, 0, 1)
|
||||
if len(peers.Data) > 0 {
|
||||
did = peers.Data[0].GetID() // if already existing set up did as made
|
||||
}
|
||||
@@ -435,9 +429,11 @@ func (d *Node) claimInfo(
|
||||
|
||||
now := time.Now().UTC()
|
||||
pRec := indexer.PeerRecordPayload{
|
||||
Name: name,
|
||||
DID: did, // REAL PEER ID
|
||||
PubKey: pubBytes,
|
||||
Name: name,
|
||||
DID: did, // REAL PEER ID
|
||||
PubKey: pubBytes,
|
||||
|
||||
IsNano: oclib.GetConfig().IsNano,
|
||||
TTLSeconds: indexer.DefaultTTLSeconds,
|
||||
ExpiryDate: now.Add(indexer.DefaultTTLSeconds * time.Second),
|
||||
}
|
||||
@@ -447,6 +443,8 @@ func (d *Node) claimInfo(
|
||||
rec := &indexer.PeerRecord{
|
||||
PeerRecordPayload: pRec,
|
||||
}
|
||||
rec.CreationDate = time.Now().UTC()
|
||||
rec.UpdateDate = time.Now().UTC()
|
||||
rec.Signature, err = priv.Sign(payload)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
||||
@@ -27,8 +27,9 @@ func (ps *PubSubService) SearchPublishEvent(
|
||||
return ps.StreamService.PublishesCommon(dt, user, groups, nil, b, stream.ProtocolSearchResource) //if partners focus only them*/
|
||||
case "partner": // define Search Strategy
|
||||
return ps.StreamService.PublishesCommon(dt, user, groups, &dbs.Filters{ // filter by like name, short_description, description, owner, url if no filters are provided
|
||||
And: map[string][]dbs.Filter{
|
||||
Or: map[string][]dbs.Filter{
|
||||
"relation": {{Operator: dbs.EQUAL.String(), Value: peer.PARTNER}},
|
||||
"is_nano": {{Operator: dbs.EQUAL.String(), Value: true}},
|
||||
},
|
||||
}, b, stream.ProtocolSearchResource)
|
||||
case "all": // Gossip PubSub
|
||||
|
||||
362
daemons/node/stream/dnt_cache.go
Normal file
362
daemons/node/stream/dnt_cache.go
Normal file
@@ -0,0 +1,362 @@
|
||||
package stream
|
||||
|
||||
// dnt_cache.go — Disconnection Network Tolerance cache for outbound stream requests.
|
||||
//
|
||||
// When a stream write fails because the remote peer is unreachable, the request
|
||||
// is saved here and retried on the next tick. Two levels are defined:
|
||||
//
|
||||
// - dntCritical : retry indefinitely (create / update / delete resource).
|
||||
// - dntModerate : up to dntMaxModerateRetries retries, then abandon.
|
||||
//
|
||||
// Pubsub messages and search streams are explicitly excluded.
|
||||
// Streams initiated from the indexer side are never enqueued here.
|
||||
//
|
||||
// # Crash-resilient persistence
|
||||
//
|
||||
// Critical entries are written to an encrypted file (AES-256-GCM) so they
|
||||
// survive a node crash/restart. The AES key is derived deterministically from
|
||||
// the node's Ed25519 private key via HKDF-SHA256 — no extra secret to manage.
|
||||
// Moderate entries are intentionally not persisted: their retry budget is small
|
||||
// enough that re-loading them after a restart would be misleading.
|
||||
|
||||
import (
|
||||
"crypto/aes"
|
||||
"crypto/cipher"
|
||||
"crypto/rand"
|
||||
"crypto/sha256"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
oclib "cloud.o-forge.io/core/oc-lib"
|
||||
"cloud.o-forge.io/core/oc-lib/tools"
|
||||
"golang.org/x/crypto/hkdf"
|
||||
|
||||
"oc-discovery/conf"
|
||||
|
||||
pp "github.com/libp2p/go-libp2p/core/peer"
|
||||
"github.com/libp2p/go-libp2p/core/protocol"
|
||||
)
|
||||
|
||||
type dntLevel int
|
||||
|
||||
const (
|
||||
dntCritical dntLevel = iota // retry until the message is delivered
|
||||
dntModerate // retry up to dntMaxModerateRetries times
|
||||
)
|
||||
|
||||
const dntMaxModerateRetries = 3
|
||||
const dntRetryInterval = 15 * time.Second
|
||||
|
||||
// dntProtocols maps each stream protocol to its DNT level.
|
||||
// Protocols absent from this map receive no caching (e.g. ProtocolSearchResource).
|
||||
var dntProtocols = map[protocol.ID]dntLevel{
|
||||
// Critical — data mutations that must eventually be delivered.
|
||||
ProtocolCreateResource: dntCritical,
|
||||
ProtocolUpdateResource: dntCritical,
|
||||
ProtocolDeleteResource: dntCritical,
|
||||
// Moderate — confirmations / config / planner: 3 retries before abandon.
|
||||
ProtocolVerifyResource: dntModerate,
|
||||
ProtocolSendPlanner: dntModerate,
|
||||
ProtocolConsidersResource: dntModerate,
|
||||
ProtocolMinioConfigResource: dntModerate,
|
||||
ProtocolAdmiraltyConfigResource: dntModerate,
|
||||
}
|
||||
|
||||
// dntEntryJSON is the on-disk representation of a dntEntry.
|
||||
// pp.AddrInfo and protocol.ID don't have built-in JSON tags so we flatten them.
|
||||
type dntEntryJSON struct {
|
||||
DID string `json:"did"`
|
||||
Addr pp.AddrInfo `json:"addr"`
|
||||
DT *tools.DataType `json:"dt,omitempty"`
|
||||
User string `json:"user"`
|
||||
Payload []byte `json:"payload"`
|
||||
Proto protocol.ID `json:"proto"`
|
||||
Retries int `json:"retries"`
|
||||
AddedAt time.Time `json:"added_at"`
|
||||
}
|
||||
|
||||
type dntEntry struct {
|
||||
did string
|
||||
addr pp.AddrInfo
|
||||
dt *tools.DataType
|
||||
user string
|
||||
payload []byte
|
||||
proto protocol.ID
|
||||
retries int
|
||||
addedAt time.Time
|
||||
}
|
||||
|
||||
func (e *dntEntry) toJSON() dntEntryJSON {
|
||||
return dntEntryJSON{
|
||||
DID: e.did,
|
||||
Addr: e.addr,
|
||||
DT: e.dt,
|
||||
User: e.user,
|
||||
Payload: e.payload,
|
||||
Proto: e.proto,
|
||||
Retries: e.retries,
|
||||
AddedAt: e.addedAt,
|
||||
}
|
||||
}
|
||||
|
||||
func entryFromJSON(j dntEntryJSON) *dntEntry {
|
||||
return &dntEntry{
|
||||
did: j.DID,
|
||||
addr: j.Addr,
|
||||
dt: j.DT,
|
||||
user: j.User,
|
||||
payload: j.Payload,
|
||||
proto: j.Proto,
|
||||
retries: j.Retries,
|
||||
addedAt: j.AddedAt,
|
||||
}
|
||||
}
|
||||
|
||||
type dntCache struct {
|
||||
mu sync.Mutex
|
||||
entries []*dntEntry
|
||||
// aesKey is the derived AES-256 key used for on-disk encryption.
|
||||
// Nil when key derivation failed: persistence is disabled but the in-memory
|
||||
// cache continues to function normally.
|
||||
aesKey []byte
|
||||
}
|
||||
|
||||
// newDNTCache initialises the cache, derives the encryption key, and restores
|
||||
// any critical entries that were persisted before the last crash.
|
||||
func newDNTCache() *dntCache {
|
||||
log := oclib.GetLogger()
|
||||
c := &dntCache{}
|
||||
key, err := deriveDNTKey()
|
||||
if err != nil {
|
||||
log.Warn().Err(err).Msg("[dnt] key derivation failed — persistence disabled")
|
||||
} else {
|
||||
c.aesKey = key
|
||||
c.loadFromDisk()
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
// enqueue adds an entry to the cache and persists critical entries to disk.
|
||||
func (c *dntCache) enqueue(e *dntEntry) {
|
||||
c.mu.Lock()
|
||||
c.entries = append(c.entries, e)
|
||||
c.mu.Unlock()
|
||||
if dntProtocols[e.proto] == dntCritical {
|
||||
go c.persistToDisk()
|
||||
}
|
||||
}
|
||||
|
||||
// drain atomically removes and returns all current entries.
|
||||
func (c *dntCache) drain() []*dntEntry {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
out := c.entries
|
||||
c.entries = nil
|
||||
return out
|
||||
}
|
||||
|
||||
// requeue puts entries back at the head of the list, preserving any new
|
||||
// entries added while the retry loop was running.
|
||||
func (c *dntCache) requeue(entries []*dntEntry) {
|
||||
if len(entries) == 0 {
|
||||
return
|
||||
}
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
c.entries = append(entries, c.entries...)
|
||||
}
|
||||
|
||||
// ── Persistence ──────────────────────────────────────────────────────────────
|
||||
|
||||
// dntCachePath returns the path of the on-disk cache file, placed next to the
|
||||
// node's private key so it lives on the same persistent volume.
|
||||
func dntCachePath() string {
|
||||
return filepath.Join(filepath.Dir(conf.GetConfig().PrivateKeyPath), "dnt_cache.bin")
|
||||
}
|
||||
|
||||
// deriveDNTKey derives a 32-byte AES key from the node's Ed25519 private key
|
||||
// using HKDF-SHA256. The derivation is deterministic: the same key is always
|
||||
// produced from the same private key, so no symmetric secret needs storing.
|
||||
func deriveDNTKey() ([]byte, error) {
|
||||
priv, err := tools.LoadKeyFromFilePrivate()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// Raw() on a libp2p Ed25519 private key returns the 64-byte representation
|
||||
// (32-byte seed || 32-byte public key). We use the full 64 bytes as IKM.
|
||||
raw, err := priv.Raw()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
reader := hkdf.New(sha256.New, raw, nil, []byte("oc-discovery/dnt-cache/v1"))
|
||||
key := make([]byte, 32)
|
||||
if _, err := io.ReadFull(reader, key); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return key, nil
|
||||
}
|
||||
|
||||
// persistToDisk encrypts all current critical entries and writes them to disk.
|
||||
// Non-critical entries are deliberately excluded — they are not worth restoring
|
||||
// after a restart given their limited retry budget.
|
||||
func (c *dntCache) persistToDisk() {
|
||||
if c.aesKey == nil {
|
||||
return
|
||||
}
|
||||
log := oclib.GetLogger()
|
||||
c.mu.Lock()
|
||||
var toSave []dntEntryJSON
|
||||
for _, e := range c.entries {
|
||||
if dntProtocols[e.proto] == dntCritical {
|
||||
toSave = append(toSave, e.toJSON())
|
||||
}
|
||||
}
|
||||
c.mu.Unlock()
|
||||
|
||||
plaintext, err := json.Marshal(toSave)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
block, err := aes.NewCipher(c.aesKey)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
gcm, err := cipher.NewGCM(block)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
nonce := make([]byte, gcm.NonceSize())
|
||||
if _, err := io.ReadFull(rand.Reader, nonce); err != nil {
|
||||
return
|
||||
}
|
||||
ciphertext := gcm.Seal(nonce, nonce, plaintext, nil)
|
||||
|
||||
path := dntCachePath()
|
||||
tmp := path + ".tmp"
|
||||
if err := os.WriteFile(tmp, ciphertext, 0600); err != nil {
|
||||
log.Warn().Err(err).Msg("[dnt] failed to write cache file")
|
||||
return
|
||||
}
|
||||
if err := os.Rename(tmp, path); err != nil {
|
||||
log.Warn().Err(err).Msg("[dnt] failed to rename cache file")
|
||||
_ = os.Remove(tmp)
|
||||
}
|
||||
}
|
||||
|
||||
// loadFromDisk decrypts the on-disk cache and re-enqueues only critical entries.
|
||||
// Errors (missing file, decryption failure) are non-fatal: the cache simply
|
||||
// starts empty, which is safe.
|
||||
func (c *dntCache) loadFromDisk() {
|
||||
if c.aesKey == nil {
|
||||
return
|
||||
}
|
||||
log := oclib.GetLogger()
|
||||
path := dntCachePath()
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
if !os.IsNotExist(err) {
|
||||
log.Warn().Err(err).Msg("[dnt] failed to read cache file")
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
block, err := aes.NewCipher(c.aesKey)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
gcm, err := cipher.NewGCM(block)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if len(data) < gcm.NonceSize() {
|
||||
log.Warn().Msg("[dnt] cache file too short, ignoring")
|
||||
return
|
||||
}
|
||||
nonce, ciphertext := data[:gcm.NonceSize()], data[gcm.NonceSize():]
|
||||
plaintext, err := gcm.Open(nil, nonce, ciphertext, nil)
|
||||
if err != nil {
|
||||
log.Warn().Err(err).Msg("[dnt] cache file decryption failed (key mismatch?), ignoring")
|
||||
return
|
||||
}
|
||||
|
||||
var saved []dntEntryJSON
|
||||
if err := json.Unmarshal(plaintext, &saved); err != nil {
|
||||
log.Warn().Err(err).Msg("[dnt] cache file unmarshal failed, ignoring")
|
||||
return
|
||||
}
|
||||
|
||||
count := 0
|
||||
for _, j := range saved {
|
||||
// Only restore critical entries — moderate entries are intentionally
|
||||
// not persisted, but this guard defends against format changes.
|
||||
if dntProtocols[j.Proto] != dntCritical {
|
||||
continue
|
||||
}
|
||||
c.entries = append(c.entries, entryFromJSON(j))
|
||||
count++
|
||||
}
|
||||
if count > 0 {
|
||||
log.Info().Int("count", count).Msg("[dnt] restored critical entries from disk")
|
||||
}
|
||||
}
|
||||
|
||||
// ── Retry loop ────────────────────────────────────────────────────────────────
|
||||
|
||||
// startDNTLoop runs the background retry goroutine. Call once after init.
|
||||
func (s *StreamService) startDNTLoop() {
|
||||
logger := oclib.GetLogger()
|
||||
ticker := time.NewTicker(dntRetryInterval)
|
||||
defer ticker.Stop()
|
||||
for range ticker.C {
|
||||
entries := s.dnt.drain()
|
||||
if len(entries) == 0 {
|
||||
continue
|
||||
}
|
||||
var keep []*dntEntry
|
||||
for _, e := range entries {
|
||||
_, err := s.write(e.did, &e.addr, e.dt, e.user, e.payload, e.proto)
|
||||
if err == nil {
|
||||
level := dntProtocols[e.proto]
|
||||
if level == dntCritical {
|
||||
logger.Info().
|
||||
Str("proto", string(e.proto)).
|
||||
Str("peer", e.did).
|
||||
Msg("[dnt] critical message delivered after retry")
|
||||
} else {
|
||||
logger.Info().
|
||||
Str("proto", string(e.proto)).
|
||||
Str("peer", e.did).
|
||||
Int("retries", e.retries).
|
||||
Msg("[dnt] moderate message delivered after retry")
|
||||
}
|
||||
continue
|
||||
}
|
||||
level := dntProtocols[e.proto]
|
||||
switch level {
|
||||
case dntCritical:
|
||||
keep = append(keep, e)
|
||||
case dntModerate:
|
||||
e.retries++
|
||||
if e.retries < dntMaxModerateRetries {
|
||||
keep = append(keep, e)
|
||||
} else {
|
||||
logger.Warn().
|
||||
Str("proto", string(e.proto)).
|
||||
Str("peer", e.did).
|
||||
Int("retries", e.retries).
|
||||
Msg("[dnt] moderate message abandoned after max retries")
|
||||
}
|
||||
}
|
||||
}
|
||||
s.dnt.requeue(keep)
|
||||
// Persist after each tick so the on-disk file reflects the current
|
||||
// state (entries delivered are removed, new ones from concurrent
|
||||
// enqueues are included).
|
||||
go s.dnt.persistToDisk()
|
||||
}
|
||||
}
|
||||
@@ -14,14 +14,23 @@ import (
|
||||
"cloud.o-forge.io/core/oc-lib/models/peer"
|
||||
"cloud.o-forge.io/core/oc-lib/models/resources"
|
||||
"cloud.o-forge.io/core/oc-lib/tools"
|
||||
"github.com/libp2p/go-libp2p/core/network"
|
||||
)
|
||||
|
||||
type Verify struct {
|
||||
IsVerified bool `json:"is_verified"`
|
||||
}
|
||||
|
||||
func (ps *StreamService) handleEvent(protocol string, evt *common.Event) error {
|
||||
fmt.Println("handleEvent")
|
||||
func (ps *StreamService) handleEvent(protocol string, evt *common.Event, s network.Stream) error {
|
||||
fmt.Println("handleEvent", protocol)
|
||||
// Heartbeat received on an outgoing ProtocolObserve stream.
|
||||
if protocol == ProtocolObserve {
|
||||
return ps.handleIncomingObserve(s)
|
||||
}
|
||||
if protocol == observeHBEventType {
|
||||
return ps.handleObserveHeartbeat(evt)
|
||||
}
|
||||
|
||||
ps.handleEventFromPartner(evt, protocol)
|
||||
/*if protocol == ProtocolVerifyResource {
|
||||
if evt.DataType == -1 {
|
||||
@@ -159,7 +168,7 @@ func (ps *StreamService) handleEventFromPartner(evt *common.Event, protocol stri
|
||||
And: map[string][]dbs.Filter{
|
||||
"peer_id": {{Operator: dbs.EQUAL.String(), Value: evt.From}},
|
||||
},
|
||||
}, evt.From, false)
|
||||
}, evt.From, false, 0, 1)
|
||||
if len(peers.Data) > 0 {
|
||||
p := peers.Data[0].(*peer.Peer)
|
||||
ps.SendResponse(p, evt, fmt.Sprintf("%v", search))
|
||||
@@ -212,7 +221,7 @@ func (abs *StreamService) SendResponse(p *peer.Peer, event *common.Event, search
|
||||
} else {
|
||||
for _, dt := range dts {
|
||||
access := oclib.NewRequestAdmin(oclib.LibDataEnum(dt), nil)
|
||||
searched := access.Search(abs.FilterPeer(self.GetID(), event.Groups, search), "", false)
|
||||
searched := access.Search(abs.FilterPeer(self.GetID(), event.Groups, search), "", false, 0, 0)
|
||||
for _, ss := range searched.Data {
|
||||
if j, err := json.Marshal(ss); err == nil {
|
||||
abs.PublishCommon(&dt, event.User, event.Groups, p.PeerID, ProtocolSearchResource, j)
|
||||
|
||||
552
daemons/node/stream/observe.go
Normal file
552
daemons/node/stream/observe.go
Normal file
@@ -0,0 +1,552 @@
|
||||
package stream
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"oc-discovery/daemons/node/common"
|
||||
|
||||
oclib "cloud.o-forge.io/core/oc-lib"
|
||||
"cloud.o-forge.io/core/oc-lib/dbs"
|
||||
"cloud.o-forge.io/core/oc-lib/models/peer"
|
||||
"cloud.o-forge.io/core/oc-lib/tools"
|
||||
"github.com/libp2p/go-libp2p/core/network"
|
||||
pp "github.com/libp2p/go-libp2p/core/peer"
|
||||
)
|
||||
|
||||
// ProtocolObserve is the libp2p protocol for peer connectivity observation.
|
||||
// The requesting oc-discovery opens a stream to the remote oc-discovery and
|
||||
// sends an ObserveRequest. The remote side keeps the stream open and writes
|
||||
// ObserveHeartbeat events back every observeHBInterval seconds.
|
||||
const ProtocolObserve = "/opencloud/peer/observe/1.0"
|
||||
|
||||
// observeHBEventType is used as the common.Event.Type for heartbeat responses.
|
||||
const observeHBEventType = "/opencloud/peer/observe/heartbeat"
|
||||
|
||||
const observeHBInterval = 30 * time.Second
|
||||
const observeDrainDuration = 30 * time.Second
|
||||
|
||||
// observeBatchWindow is the accumulation window before a heartbeat batch is
|
||||
// flushed to NATS. All peer heartbeats received within this window are grouped
|
||||
// into a single PEER_OBSERVE_RESPONSE_EVENT, reducing NATS traffic.
|
||||
const observeBatchWindow = 2 * time.Second
|
||||
|
||||
// ObserveRequest is the first (and only) message sent by the observing side
|
||||
// when opening a ProtocolObserve stream.
|
||||
type ObserveRequest struct {
|
||||
// Close, when true, asks the remote side to stop the heartbeat goroutine
|
||||
// and remove the observer from its cache. Used for graceful teardown.
|
||||
Close bool `json:"close,omitempty"`
|
||||
}
|
||||
|
||||
// ObserveHeartbeat is sent by the observed side every observeHBInterval.
|
||||
type ObserveHeartbeat struct {
|
||||
State string `json:"state"` // always "online" when actively emitted
|
||||
}
|
||||
|
||||
// ShallowPeer is the minimal peer representation sent by oc-peer in a
|
||||
// PEER_OBSERVE_EVENT. StreamAddress lets oc-discovery connect without a DB
|
||||
// lookup; Address carries the NATSAddress (unused here, forwarded as-is).
|
||||
type ShallowPeer struct {
|
||||
ID string `json:"id"`
|
||||
PeerID string `json:"peer_id"`
|
||||
Address string `json:"address"`
|
||||
StreamAddress string `json:"stream_address"`
|
||||
}
|
||||
|
||||
// ObserveCommand is the payload carried by a PEER_OBSERVE_EVENT NATS message
|
||||
// (from oc-peer).
|
||||
//
|
||||
// Observe → User + Peers populated
|
||||
// Close → User + PeerIDs + Close=true
|
||||
// CloseAll → CloseAll=true (User optional)
|
||||
type ObserveCommand struct {
|
||||
User string `json:"user"`
|
||||
Peers []ShallowPeer `json:"peers,omitempty"`
|
||||
PeerIDs []string `json:"peer_ids,omitempty"`
|
||||
Close bool `json:"close,omitempty"`
|
||||
CloseAll bool `json:"close_all,omitempty"`
|
||||
}
|
||||
|
||||
// ── observe cache (observed side) ────────────────────────────────────────────
|
||||
|
||||
// observeCache tracks running heartbeat goroutines keyed by the observing
|
||||
// peer's libp2p PeerID string. It is used exclusively on the OBSERVED side.
|
||||
type observeCache struct {
|
||||
mu sync.Mutex
|
||||
cancels map[string]context.CancelFunc
|
||||
}
|
||||
|
||||
func newObserveCache() *observeCache {
|
||||
return &observeCache{cancels: map[string]context.CancelFunc{}}
|
||||
}
|
||||
|
||||
func (c *observeCache) set(pid string, cancel context.CancelFunc) {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
if old, ok := c.cancels[pid]; ok {
|
||||
old() // cancel previous goroutine if any
|
||||
}
|
||||
c.cancels[pid] = cancel
|
||||
}
|
||||
|
||||
func (c *observeCache) cancel(pid string) {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
if fn, ok := c.cancels[pid]; ok {
|
||||
fn()
|
||||
delete(c.cancels, pid)
|
||||
}
|
||||
}
|
||||
|
||||
func (c *observeCache) cancelAll() {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
for _, fn := range c.cancels {
|
||||
fn()
|
||||
}
|
||||
c.cancels = map[string]context.CancelFunc{}
|
||||
}
|
||||
|
||||
func (c *observeCache) delete(pid string) {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
delete(c.cancels, pid)
|
||||
}
|
||||
|
||||
// ── heartbeat batcher (observing side) ───────────────────────────────────────
|
||||
|
||||
// heartbeatBatcher accumulates peer_ids from incoming heartbeats over
|
||||
// observeBatchWindow, then flushes them in a single NATS call.
|
||||
// Using a map as the backing store deduplicates multiple heartbeats from the
|
||||
// same peer within the same window (should not happen, but is harmless).
|
||||
type heartbeatBatcher struct {
|
||||
mu sync.Mutex
|
||||
ids map[string]struct{}
|
||||
timer *time.Timer
|
||||
flush func(peerIDs []string)
|
||||
}
|
||||
|
||||
func newHeartbeatBatcher(flush func([]string)) *heartbeatBatcher {
|
||||
return &heartbeatBatcher{
|
||||
ids: make(map[string]struct{}),
|
||||
flush: flush,
|
||||
}
|
||||
}
|
||||
|
||||
// add records peerID in the current batch and arms the flush timer if needed.
|
||||
func (b *heartbeatBatcher) add(peerID string) {
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
b.ids[peerID] = struct{}{}
|
||||
if b.timer == nil {
|
||||
b.timer = time.AfterFunc(observeBatchWindow, b.fire)
|
||||
}
|
||||
}
|
||||
|
||||
// fire is called by the timer; it drains the batch and invokes flush.
|
||||
func (b *heartbeatBatcher) fire() {
|
||||
b.mu.Lock()
|
||||
ids := make([]string, 0, len(b.ids))
|
||||
for id := range b.ids {
|
||||
ids = append(ids, id)
|
||||
}
|
||||
b.ids = make(map[string]struct{})
|
||||
b.timer = nil
|
||||
b.mu.Unlock()
|
||||
if len(ids) > 0 {
|
||||
b.flush(ids)
|
||||
}
|
||||
}
|
||||
|
||||
// flushObserveBatch is the flush function wired into the heartbeatBatcher.
|
||||
// It emits two NATS messages:
|
||||
// - PEER_OBSERVE_RESPONSE_EVENT → consumed by oc-peer (direct channel)
|
||||
// - PROPALGATION_EVENT / PB_PROPAGATE → consumed by other oc-discovery nodes
|
||||
func flushObserveBatch(peerIDs []string) {
|
||||
payload, err := json.Marshal(map[string]interface{}{
|
||||
"peer_ids": peerIDs,
|
||||
"state": "online",
|
||||
})
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Direct notification to oc-peer.
|
||||
tools.NewNATSCaller().SetNATSPub(tools.PEER_OBSERVE_RESPONSE_EVENT, tools.NATSResponse{
|
||||
FromApp: "oc-discovery",
|
||||
Datatype: tools.PEER,
|
||||
Method: int(tools.PEER_OBSERVE_RESPONSE_EVENT),
|
||||
Payload: payload,
|
||||
})
|
||||
|
||||
// Broadcast to other oc-discovery nodes so they can forward to their
|
||||
// local oc-peer if needed.
|
||||
propPayload, err := json.Marshal(tools.PropalgationMessage{
|
||||
DataType: int(tools.PEER),
|
||||
Action: tools.PB_PROPAGATE,
|
||||
Payload: payload,
|
||||
})
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
tools.NewNATSCaller().SetNATSPub(tools.PROPALGATION_EVENT, tools.NATSResponse{
|
||||
FromApp: "oc-discovery",
|
||||
Datatype: tools.PEER,
|
||||
Method: int(tools.PROPALGATION_EVENT),
|
||||
Payload: propPayload,
|
||||
})
|
||||
}
|
||||
|
||||
// ── incoming observe handler (observed side) ──────────────────────────────────
|
||||
|
||||
// handleIncomingObserve is registered as the ProtocolObserve stream handler.
|
||||
// It is called when a remote peer opens an observe stream to us.
|
||||
// The function reads the request, validates it, then starts (or stops) the
|
||||
// heartbeat goroutine and returns immediately — the goroutine owns the stream.
|
||||
func (s *StreamService) handleIncomingObserve(rawStream network.Stream) error {
|
||||
remotePeerID := rawStream.Conn().RemotePeer().String()
|
||||
addr := rawStream.Conn().RemoteMultiaddr().String()
|
||||
ad, err := pp.AddrInfoFromString(addr + "/p2p/" + remotePeerID)
|
||||
if err != nil {
|
||||
fmt.Println("qndlqnl EERR", addr, err)
|
||||
return err
|
||||
}
|
||||
log := oclib.GetLogger()
|
||||
|
||||
// Drain mode: reject any new observations for 30 s after a close-all.
|
||||
s.drainMu.RLock()
|
||||
draining := !s.drainUntil.IsZero() && time.Now().Before(s.drainUntil)
|
||||
s.drainMu.RUnlock()
|
||||
if draining {
|
||||
rawStream.Close()
|
||||
fmt.Println("Draining")
|
||||
return errors.New("Draining")
|
||||
}
|
||||
// Read the observe request (with a generous deadline to avoid hangs).
|
||||
// Guard: the requesting peer must not be blacklisted or be ourself.
|
||||
did := ""
|
||||
access := oclib.NewRequestAdmin(oclib.LibDataEnum(oclib.PEER), nil)
|
||||
res := access.Search(&dbs.Filters{
|
||||
And: map[string][]dbs.Filter{
|
||||
"peer_id": {{Operator: dbs.EQUAL.String(), Value: remotePeerID}},
|
||||
},
|
||||
}, "", false, 0, 1)
|
||||
if len(res.Data) > 0 {
|
||||
p := res.Data[0].(*peer.Peer)
|
||||
did = p.GetID()
|
||||
if p.Relation == peer.BLACKLIST { // || p.Relation == peer.SELF
|
||||
rawStream.Close()
|
||||
fmt.Println("CLOSE blacklist or self")
|
||||
return errors.New("can't exploit blacklist or self")
|
||||
}
|
||||
}
|
||||
|
||||
// Replace any existing heartbeat goroutine for this observer.
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
s.observeCache.set(remotePeerID, cancel)
|
||||
fmt.Println("LOOP OBSERVE")
|
||||
go func() {
|
||||
defer rawStream.Close()
|
||||
defer cancel()
|
||||
defer s.observeCache.delete(remotePeerID)
|
||||
|
||||
ticker := time.NewTicker(observeHBInterval)
|
||||
defer ticker.Stop()
|
||||
|
||||
hbPayload, _ := json.Marshal(ObserveHeartbeat{State: "online"})
|
||||
evt := common.NewEvent(observeHBEventType, s.Host.ID().String(), nil, "", hbPayload)
|
||||
if evt == nil {
|
||||
return
|
||||
}
|
||||
if s.Streams, err = common.TempStream(s.Host, *ad, ProtocolObserve, did, s.Streams, protocols, &s.Mu); err == nil {
|
||||
stream := s.Streams[ProtocolObserve][ad.ID]
|
||||
if err := json.NewEncoder(stream.Stream).Encode(evt); err != nil {
|
||||
// Moderate connectivity event: the observer is unreachable.
|
||||
// The deferred calls above purge this observer from the cache.
|
||||
fmt.Println("LOOP EVT ERR", err)
|
||||
log.Info().
|
||||
Str("observer", remotePeerID).
|
||||
Err(err).
|
||||
Msg("[observe] heartbeat write failed — moderate connectivity event, purging observer from cache")
|
||||
return
|
||||
}
|
||||
}
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
|
||||
rawStream.SetWriteDeadline(time.Now().Add(5 * time.Second))
|
||||
fmt.Println("LOOP EVT", evt)
|
||||
var err error
|
||||
if s.Streams, err = common.TempStream(s.Host, *ad, ProtocolObserve, did, s.Streams, protocols, &s.Mu); err == nil {
|
||||
stream := s.Streams[ProtocolObserve][ad.ID]
|
||||
if err := json.NewEncoder(stream.Stream).Encode(evt); err != nil {
|
||||
// Moderate connectivity event: the observer is unreachable.
|
||||
// The deferred calls above purge this observer from the cache.
|
||||
fmt.Println("LOOP EVT ERR", err)
|
||||
log.Info().
|
||||
Str("observer", remotePeerID).
|
||||
Err(err).
|
||||
Msg("[observe] heartbeat write failed — moderate connectivity event, purging observer from cache")
|
||||
return
|
||||
}
|
||||
}
|
||||
rawStream.SetWriteDeadline(time.Time{})
|
||||
}
|
||||
}
|
||||
}()
|
||||
return nil
|
||||
}
|
||||
|
||||
// ── heartbeat receiver (observing side) ───────────────────────────────────────
|
||||
|
||||
// handleObserveHeartbeat is called by readLoop when a heartbeat event arrives
|
||||
// on an outgoing ProtocolObserve stream. It queues the peer_id in the batch
|
||||
// accumulator; the batcher flushes to NATS after observeBatchWindow.
|
||||
func (ps *StreamService) handleObserveHeartbeat(evt *common.Event) error {
|
||||
// ps.hbBatcher.add(evt.From)
|
||||
flushObserveBatch([]string{evt.From})
|
||||
return nil
|
||||
}
|
||||
|
||||
// ── user→peer index (ref-counted observe management) ─────────────────────────
|
||||
|
||||
// userPeerIndex tracks which users are observing which peers.
|
||||
// A libp2p observe stream is kept open as long as at least one user watches
|
||||
// the peer; it is closed only when the last user stops.
|
||||
type userPeerIndex struct {
|
||||
mu sync.Mutex
|
||||
index map[string]map[string]struct{} // user → set of peer_id strings
|
||||
}
|
||||
|
||||
func newUserPeerIndex() *userPeerIndex {
|
||||
return &userPeerIndex{index: map[string]map[string]struct{}{}}
|
||||
}
|
||||
|
||||
// add registers user as an observer of peerID.
|
||||
// Returns true if peerID was not yet observed by any user (first observer).
|
||||
func (u *userPeerIndex) add(user, peerID string) (isFirst bool) {
|
||||
u.mu.Lock()
|
||||
defer u.mu.Unlock()
|
||||
// Count total observers for peerID across all users before adding.
|
||||
total := 0
|
||||
for _, peers := range u.index {
|
||||
if _, ok := peers[peerID]; ok {
|
||||
total++
|
||||
}
|
||||
}
|
||||
if u.index[user] == nil {
|
||||
u.index[user] = map[string]struct{}{}
|
||||
}
|
||||
u.index[user][peerID] = struct{}{}
|
||||
return total == 0
|
||||
}
|
||||
|
||||
// remove unregisters user from peerID.
|
||||
// Returns true if no user is observing peerID anymore (last observer removed).
|
||||
func (u *userPeerIndex) remove(user, peerID string) (isLast bool) {
|
||||
u.mu.Lock()
|
||||
defer u.mu.Unlock()
|
||||
delete(u.index[user], peerID)
|
||||
if len(u.index[user]) == 0 {
|
||||
delete(u.index, user)
|
||||
}
|
||||
for _, peers := range u.index {
|
||||
if _, ok := peers[peerID]; ok {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// removeUser removes all entries for user and returns the peer_ids that now
|
||||
// have no remaining observers (i.e., those whose streams should be closed).
|
||||
func (u *userPeerIndex) removeUser(user string) []string {
|
||||
u.mu.Lock()
|
||||
defer u.mu.Unlock()
|
||||
watched := u.index[user]
|
||||
delete(u.index, user)
|
||||
var orphans []string
|
||||
for peerID := range watched {
|
||||
found := false
|
||||
for _, peers := range u.index {
|
||||
if _, ok := peers[peerID]; ok {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
orphans = append(orphans, peerID)
|
||||
}
|
||||
}
|
||||
return orphans
|
||||
}
|
||||
|
||||
// ── NATS command handler (observing side) ─────────────────────────────────────
|
||||
|
||||
// HandleObserveNATSCommand processes a PEER_OBSERVE_EVENT received from oc-peer.
|
||||
func (ps *StreamService) HandleObserveNATSCommand(resp tools.NATSResponse) {
|
||||
log := oclib.GetLogger()
|
||||
var cmd ObserveCommand
|
||||
if err := json.Unmarshal(resp.Payload, &cmd); err != nil {
|
||||
log.Warn().Err(err).Msg("[observe] failed to unmarshal ObserveCommand")
|
||||
return
|
||||
}
|
||||
if cmd.CloseAll {
|
||||
log.Info().Msg("[observe] close-all received via NATS")
|
||||
ps.CloseAllObserves()
|
||||
return
|
||||
}
|
||||
if cmd.Close {
|
||||
for _, peerID := range cmd.PeerIDs {
|
||||
if isLast := ps.observeUsers.remove(cmd.User, peerID); isLast {
|
||||
if err := ps.closeObserveStream(peerID); err != nil {
|
||||
log.Warn().Str("peer", peerID).Err(err).Msg("[observe] closeObserveStream failed")
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
// Observe: open streams for any new peer, using the address from the payload.
|
||||
for _, p := range cmd.Peers {
|
||||
if isFirst := ps.observeUsers.add(cmd.User, p.PeerID); isFirst {
|
||||
if err := ps.openObserveStream(p); err != nil {
|
||||
// Roll back the index entry so the next NATS command can retry.
|
||||
ps.observeUsers.remove(cmd.User, p.PeerID)
|
||||
log.Warn().Str("peer", p.PeerID).Err(err).Msg("[observe] openObserveStream failed")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── outgoing observe management (observing side) ──────────────────────────────
|
||||
|
||||
// OpenObserveStream is the exported variant for inter-discovery propagation
|
||||
// (no user context available). It bypasses the user index and opens the stream
|
||||
// directly if not already open.
|
||||
func (ps *StreamService) OpenObserveStream(p ShallowPeer) error {
|
||||
return ps.openObserveStream(p)
|
||||
}
|
||||
|
||||
// CloseObserveStream is the exported variant for inter-discovery propagation.
|
||||
func (ps *StreamService) CloseObserveStream(toPeerID string) error {
|
||||
return ps.closeObserveStream(toPeerID)
|
||||
}
|
||||
|
||||
// openObserveStream opens a ProtocolObserve stream to p.
|
||||
// Uses p.StreamAddress directly; falls back to DB then DHT lookup if empty.
|
||||
func (ps *StreamService) openObserveStream(p ShallowPeer) error {
|
||||
streamAddr := p.StreamAddress
|
||||
fmt.Println("STREAM OBS", streamAddr)
|
||||
access := oclib.NewRequestAdmin(oclib.LibDataEnum(oclib.PEER), nil)
|
||||
res := access.Search(&dbs.Filters{
|
||||
And: map[string][]dbs.Filter{
|
||||
"peer_id": {{Operator: dbs.EQUAL.String(), Value: p.PeerID}},
|
||||
},
|
||||
}, "", false, 0, 1)
|
||||
if streamAddr == "" {
|
||||
// Fallback: DB then DHT.
|
||||
if len(res.Data) > 0 {
|
||||
streamAddr = res.Data[0].(*peer.Peer).StreamAddress
|
||||
} else if peers, err := ps.Node.GetPeerRecord(context.Background(), p.PeerID); err == nil && len(peers) > 0 {
|
||||
streamAddr = peers[0].StreamAddress
|
||||
}
|
||||
}
|
||||
if len(res.Data) > 0 && res.Data[0].(*peer.Peer).Relation == peer.SELF {
|
||||
return errors.New("Can't send to self")
|
||||
}
|
||||
fmt.Println("STREAM OBS SSS", streamAddr)
|
||||
|
||||
if streamAddr == "" {
|
||||
return nil // can't resolve address — silently skip
|
||||
}
|
||||
|
||||
decodedID, err := pp.Decode(p.PeerID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// If a stream already exists, reuse it.
|
||||
ps.Mu.RLock()
|
||||
_, alreadyOpen := ps.Streams[ProtocolObserve][decodedID]
|
||||
ps.Mu.RUnlock()
|
||||
if alreadyOpen {
|
||||
return nil
|
||||
}
|
||||
ad, err := pp.AddrInfoFromString(streamAddr)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fmt.Println("TempStream OBSERVE", ad)
|
||||
if ps.Streams, err = common.TempStream(ps.Host, *ad, ProtocolObserve, p.ID, ps.Streams, protocols, &ps.Mu); err == nil {
|
||||
rawStream := ps.Streams[ProtocolObserve][ad.ID]
|
||||
if hbPayload, err := json.Marshal(ObserveRequest{Close: false}); err == nil {
|
||||
if err := json.NewEncoder(rawStream.Stream).Encode(common.NewEvent(ProtocolObserve, ps.Host.ID().String(), nil, "", hbPayload)); err != nil {
|
||||
fmt.Println("ERR")
|
||||
rawStream.Stream.Close()
|
||||
return err
|
||||
}
|
||||
s := &common.Stream{
|
||||
Stream: rawStream.Stream,
|
||||
Expiry: time.Now().Add(365 * 24 * time.Hour),
|
||||
}
|
||||
ps.Mu.Lock()
|
||||
if ps.Streams[ProtocolObserve] == nil {
|
||||
ps.Streams[ProtocolObserve] = map[pp.ID]*common.Stream{}
|
||||
}
|
||||
ps.Streams[ProtocolObserve][ad.ID] = s
|
||||
ps.Mu.Unlock()
|
||||
|
||||
go ps.readLoop(s, ad.ID, ProtocolObserve, &common.ProtocolInfo{PersistantStream: true})
|
||||
}
|
||||
|
||||
} else {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// closeObserveStream closes the ProtocolObserve stream to toPeerID and notifies
|
||||
// the remote side.
|
||||
func (ps *StreamService) closeObserveStream(toPeerID string) error {
|
||||
decodedID, err := pp.Decode(toPeerID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ps.Mu.Lock()
|
||||
if ps.Streams[ProtocolObserve] != nil {
|
||||
if s, ok := ps.Streams[ProtocolObserve][decodedID]; ok {
|
||||
_ = json.NewEncoder(s.Stream).Encode(ObserveRequest{Close: true})
|
||||
s.Stream.Close()
|
||||
delete(ps.Streams[ProtocolObserve], decodedID)
|
||||
}
|
||||
}
|
||||
ps.Mu.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
// CloseAllObserves closes every outgoing ProtocolObserve stream, clears the
|
||||
// user index, and enters drain mode for observeDrainDuration.
|
||||
func (ps *StreamService) CloseAllObserves() {
|
||||
ps.Mu.Lock()
|
||||
for _, s := range ps.Streams[ProtocolObserve] {
|
||||
_ = json.NewEncoder(s.Stream).Encode(ObserveRequest{Close: true})
|
||||
s.Stream.Close()
|
||||
}
|
||||
delete(ps.Streams, ProtocolObserve)
|
||||
ps.Mu.Unlock()
|
||||
|
||||
// Reset user index so stale ref-counts don't block future opens.
|
||||
ps.observeUsers = newUserPeerIndex()
|
||||
|
||||
ps.drainMu.Lock()
|
||||
ps.drainUntil = time.Now().Add(observeDrainDuration)
|
||||
ps.drainMu.Unlock()
|
||||
}
|
||||
@@ -6,6 +6,8 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"oc-discovery/daemons/node/common"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
oclib "cloud.o-forge.io/core/oc-lib"
|
||||
"cloud.o-forge.io/core/oc-lib/dbs"
|
||||
@@ -19,9 +21,9 @@ func (ps *StreamService) PublishesCommon(dt *tools.DataType, user string, groups
|
||||
access := oclib.NewRequestAdmin(oclib.LibDataEnum(oclib.PEER), nil)
|
||||
var p oclib.LibDataShallow
|
||||
if filter == nil {
|
||||
p = access.LoadAll(false)
|
||||
p = access.LoadAll(false, 0, 10000)
|
||||
} else {
|
||||
p = access.Search(filter, "", false)
|
||||
p = access.Search(filter, "", false, 0, 10000)
|
||||
}
|
||||
for _, pes := range p.Data {
|
||||
for _, proto := range protos {
|
||||
@@ -45,7 +47,7 @@ func (ps *StreamService) PublishCommon(dt *tools.DataType, user string, groups [
|
||||
And: map[string][]dbs.Filter{ // search by name if no filters are provided
|
||||
"peer_id": {{Operator: dbs.EQUAL.String(), Value: toPeerID}},
|
||||
},
|
||||
}, toPeerID, false)
|
||||
}, toPeerID, false, 0, 1)
|
||||
var pe *peer.Peer
|
||||
if len(p.Data) > 0 && p.Data[0].(*peer.Peer).Relation != peer.BLACKLIST {
|
||||
pe = p.Data[0].(*peer.Peer)
|
||||
@@ -57,13 +59,36 @@ func (ps *StreamService) PublishCommon(dt *tools.DataType, user string, groups [
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return ps.write(toPeerID, ad, dt, user, resource, proto)
|
||||
stream, err := ps.write(toPeerID, ad, dt, user, resource, proto)
|
||||
if err != nil {
|
||||
if _, ok := dntProtocols[proto]; ok {
|
||||
ps.dnt.enqueue(&dntEntry{
|
||||
did: toPeerID,
|
||||
addr: *ad,
|
||||
dt: dt,
|
||||
user: user,
|
||||
payload: resource,
|
||||
proto: proto,
|
||||
addedAt: time.Now().UTC(),
|
||||
})
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
return stream, nil
|
||||
}
|
||||
return nil, errors.New("peer unvalid " + toPeerID)
|
||||
}
|
||||
|
||||
func (ps *StreamService) ToPartnerPublishEvent(
|
||||
ctx context.Context, action tools.PubSubAction, dt *tools.DataType, user string, groups []string, payload []byte) error {
|
||||
var proto protocol.ID
|
||||
proto = ProtocolCreateResource
|
||||
switch action {
|
||||
case tools.PB_DELETE:
|
||||
proto = ProtocolDeleteResource
|
||||
case tools.PB_UPDATE:
|
||||
proto = ProtocolUpdateResource
|
||||
}
|
||||
if *dt == tools.PEER {
|
||||
var p peer.Peer
|
||||
if err := json.Unmarshal(payload, &p); err != nil {
|
||||
@@ -87,25 +112,30 @@ func (ps *StreamService) ToPartnerPublishEvent(
|
||||
|
||||
}
|
||||
}
|
||||
var per peer.Peer
|
||||
if err := json.Unmarshal(payload, &per); err == nil && !strings.Contains(per.Relation.String(), "master") && !strings.Contains(per.Relation.String(), "nano") {
|
||||
for _, rel := range []peer.PeerRelation{peer.MASTER, peer.NANO} {
|
||||
ps.PublishesCommon(dt, user, groups, &dbs.Filters{
|
||||
And: map[string][]dbs.Filter{
|
||||
"relation": {{Operator: dbs.EQUAL.String(), Value: rel}},
|
||||
},
|
||||
}, payload, proto)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
ks := []protocol.ID{}
|
||||
for k := range protocolsPartners {
|
||||
ks = append(ks, k)
|
||||
}
|
||||
var proto protocol.ID
|
||||
proto = ProtocolCreateResource
|
||||
switch action {
|
||||
case tools.PB_DELETE:
|
||||
proto = ProtocolDeleteResource
|
||||
case tools.PB_UPDATE:
|
||||
proto = ProtocolUpdateResource
|
||||
for _, rel := range []peer.PeerRelation{peer.PARTNER, peer.MASTER, peer.NANO} {
|
||||
ps.PublishesCommon(dt, user, groups, &dbs.Filters{
|
||||
And: map[string][]dbs.Filter{
|
||||
"relation": {{Operator: dbs.EQUAL.String(), Value: rel}},
|
||||
},
|
||||
}, payload, proto)
|
||||
}
|
||||
ps.PublishesCommon(dt, user, groups, &dbs.Filters{ // filter by like name, short_description, description, owner, url if no filters are provided
|
||||
And: map[string][]dbs.Filter{
|
||||
"relation": {{Operator: dbs.EQUAL.String(), Value: peer.PARTNER}},
|
||||
},
|
||||
}, payload, proto)
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -129,7 +159,6 @@ func (s *StreamService) write(
|
||||
if s.Streams, err = common.TempStream(s.Host, *peerID, proto, did, s.Streams, pts, &s.Mu); err != nil {
|
||||
fmt.Println("TempStream", err)
|
||||
return nil, errors.New("no stream available for protocol " + fmt.Sprintf("%v", proto) + " from PID " + peerID.ID.String())
|
||||
|
||||
}
|
||||
|
||||
stream := s.Streams[proto][peerID.ID]
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"time"
|
||||
|
||||
oclib "cloud.o-forge.io/core/oc-lib"
|
||||
"cloud.o-forge.io/core/oc-lib/config"
|
||||
"cloud.o-forge.io/core/oc-lib/dbs"
|
||||
"cloud.o-forge.io/core/oc-lib/models/peer"
|
||||
"cloud.o-forge.io/core/oc-lib/models/utils"
|
||||
@@ -42,6 +43,7 @@ var protocols = map[protocol.ID]*common.ProtocolInfo{
|
||||
ProtocolVerifyResource: {WaitResponse: true, TTL: 1 * time.Minute},
|
||||
ProtocolMinioConfigResource: {WaitResponse: true, TTL: 1 * time.Minute},
|
||||
ProtocolAdmiraltyConfigResource: {WaitResponse: true, TTL: 1 * time.Minute},
|
||||
ProtocolObserve: {WaitResponse: true, TTL: 1 * time.Minute},
|
||||
}
|
||||
|
||||
var protocolsPartners = map[protocol.ID]*common.ProtocolInfo{
|
||||
@@ -61,6 +63,21 @@ type StreamService struct {
|
||||
// IsPeerKnown, when set, is called at stream open for every inbound protocol.
|
||||
// Return false to reset the stream immediately. Left nil until wired by the node.
|
||||
IsPeerKnown func(pid pp.ID) bool
|
||||
// dnt is the Disconnection Network Tolerance cache for outbound streams.
|
||||
dnt *dntCache
|
||||
// observeCache tracks running heartbeat goroutines on the OBSERVED side.
|
||||
observeCache *observeCache
|
||||
// hbBatcher accumulates incoming heartbeats (observing side) and flushes
|
||||
// them as a single NATS batch after observeBatchWindow.
|
||||
hbBatcher *heartbeatBatcher
|
||||
// drainUntil / drainMu implement the startup drain window: for 30 s after a
|
||||
// close-all, incoming ProtocolObserve requests are rejected so stale heartbeats
|
||||
// from a previous run cannot mix with fresh observations.
|
||||
drainUntil time.Time
|
||||
drainMu sync.RWMutex
|
||||
// observeUsers tracks which users are observing which peers so streams are
|
||||
// closed only when the last observer for a peer disconnects.
|
||||
observeUsers *userPeerIndex
|
||||
}
|
||||
|
||||
func InitStream(ctx context.Context, h host.Host, key pp.ID, maxNode int, node common.DiscoveryPeer) (*StreamService, error) {
|
||||
@@ -72,31 +89,60 @@ func InitStream(ctx context.Context, h host.Host, key pp.ID, maxNode int, node c
|
||||
Streams: common.ProtocolStream{},
|
||||
maxNodesConn: maxNode,
|
||||
ResourceSearches: common.NewSearchTracker(),
|
||||
dnt: newDNTCache(),
|
||||
observeCache: newObserveCache(),
|
||||
observeUsers: newUserPeerIndex(),
|
||||
}
|
||||
service.hbBatcher = newHeartbeatBatcher(flushObserveBatch)
|
||||
for proto := range protocols {
|
||||
service.Host.SetStreamHandler(proto, service.gate(service.HandleResponse))
|
||||
}
|
||||
// ProtocolObserve uses a dedicated handler (bidirectional, long-lived).
|
||||
logger.Info().Msg("connect to partners...")
|
||||
service.connectToPartners() // we set up a stream
|
||||
go service.StartGC(8 * time.Second)
|
||||
go service.startDNTLoop()
|
||||
return service, nil
|
||||
}
|
||||
|
||||
// gate wraps a stream handler with IsPeerKnown validation.
|
||||
// If the peer is unknown the entire connection is closed and the handler is not called.
|
||||
// IsPeerKnown is read at stream-open time so it works even when set after InitStream.
|
||||
func (s *StreamService) gatePrivilege(h func(network.Stream)) func(network.Stream) {
|
||||
return func(stream network.Stream) {
|
||||
if config.GetConfig().IsNano {
|
||||
d := oclib.NewRequestAdmin(oclib.LibDataEnum(oclib.PEER), nil).Search(&dbs.Filters{
|
||||
And: map[string][]dbs.Filter{
|
||||
"relation": {{Operator: dbs.EQUAL.String(), Value: peer.MASTER}},
|
||||
},
|
||||
}, "", false, 0, 1)
|
||||
if len(d.Data) == 0 {
|
||||
return
|
||||
}
|
||||
}
|
||||
s.knowingGate(stream, h)
|
||||
}
|
||||
}
|
||||
|
||||
// gate wraps a stream handler with IsPeerKnown validation.
|
||||
// If the peer is unknown the entire connection is closed and the handler is not called.
|
||||
// IsPeerKnown is read at stream-open time so it works even when set after InitStream.
|
||||
func (s *StreamService) gate(h func(network.Stream)) func(network.Stream) {
|
||||
return func(stream network.Stream) {
|
||||
if s.IsPeerKnown != nil && !s.IsPeerKnown(stream.Conn().RemotePeer()) {
|
||||
logger := oclib.GetLogger()
|
||||
logger.Warn().Str("peer", stream.Conn().RemotePeer().String()).Msg("[stream] unknown peer, closing connection")
|
||||
stream.Conn().Close()
|
||||
return
|
||||
}
|
||||
h(stream)
|
||||
s.knowingGate(stream, h)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *StreamService) knowingGate(stream network.Stream, h func(network.Stream)) {
|
||||
if s.IsPeerKnown != nil && !s.IsPeerKnown(stream.Conn().RemotePeer()) {
|
||||
logger := oclib.GetLogger()
|
||||
logger.Warn().Str("peer", stream.Conn().RemotePeer().String()).Msg("[stream] unknown peer, closing connection")
|
||||
stream.Conn().Close()
|
||||
return
|
||||
}
|
||||
h(stream)
|
||||
}
|
||||
|
||||
func (s *StreamService) HandleResponse(stream network.Stream) {
|
||||
s.Mu.Lock()
|
||||
defer s.Mu.Unlock()
|
||||
@@ -137,13 +183,27 @@ func (s *StreamService) connectToPartners() error {
|
||||
go s.readLoop(s.Streams[proto][ss.Conn().RemotePeer()], ss.Conn().RemotePeer(), proto, info)
|
||||
}
|
||||
logger.Info().Msg("SetStreamHandler " + string(proto))
|
||||
s.Host.SetStreamHandler(proto, s.gate(f))
|
||||
s.Host.SetStreamHandler(proto, s.gatePrivilege(f))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *StreamService) searchPeer(search string) ([]*peer.Peer, error) {
|
||||
ps := []*peer.Peer{}
|
||||
if conf.GetConfig().NanoIDS != "" {
|
||||
for _, peerID := range strings.Split(conf.GetConfig().NanoIDS, ",") {
|
||||
ppID := strings.Split(peerID, "/")
|
||||
ps = append(ps, &peer.Peer{
|
||||
AbstractObject: utils.AbstractObject{
|
||||
UUID: uuid.New().String(),
|
||||
Name: ppID[1],
|
||||
},
|
||||
PeerID: ppID[len(ppID)-1],
|
||||
StreamAddress: peerID,
|
||||
Relation: peer.NANO,
|
||||
})
|
||||
}
|
||||
}
|
||||
if conf.GetConfig().PeerIDS != "" {
|
||||
for _, peerID := range strings.Split(conf.GetConfig().PeerIDS, ",") {
|
||||
ppID := strings.Split(peerID, "/")
|
||||
@@ -159,7 +219,7 @@ func (s *StreamService) searchPeer(search string) ([]*peer.Peer, error) {
|
||||
}
|
||||
}
|
||||
access := oclib.NewRequestAdmin(oclib.LibDataEnum(oclib.PEER), nil)
|
||||
peers := access.Search(nil, search, false)
|
||||
peers := access.Search(nil, search, false, 0, 0)
|
||||
for _, p := range peers.Data {
|
||||
ps = append(ps, p.(*peer.Peer))
|
||||
}
|
||||
@@ -230,7 +290,7 @@ func (ps *StreamService) readLoop(s *common.Stream, id pp.ID, proto protocol.ID,
|
||||
}
|
||||
continue
|
||||
}
|
||||
ps.handleEvent(evt.Type, &evt)
|
||||
ps.handleEvent(evt.Type, &evt, s.Stream)
|
||||
if protocolInfo.WaitResponse && !protocolInfo.PersistantStream {
|
||||
break
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user