Files
oc-discovery/daemons/node/indexer/nameindex.go

321 lines
9.8 KiB
Go
Raw Normal View History

2026-03-03 16:38:24 +01:00
package indexer
import (
"context"
"encoding/json"
"strings"
"sync"
"time"
"oc-discovery/daemons/node/common"
oclib "cloud.o-forge.io/core/oc-lib"
pubsub "github.com/libp2p/go-libp2p-pubsub"
pp "github.com/libp2p/go-libp2p/core/peer"
)
// TopicNameIndex is the GossipSub topic shared by regular indexers to exchange
// add/delete events for the distributed name→peerID mapping.
const TopicNameIndex = "oc-name-index"
// nameIndexDedupWindow suppresses re-emission of the same (action, name, peerID)
// tuple within this window, reducing duplicate events when a node is registered
// with multiple indexers simultaneously.
const nameIndexDedupWindow = 30 * time.Second
// NameIndexAction indicates whether a name mapping is being added or removed.
type NameIndexAction string
const (
NameIndexAdd NameIndexAction = "add"
NameIndexDelete NameIndexAction = "delete"
)
// NameIndexEvent is published on TopicNameIndex by each indexer when a node
// registers (add) or is evicted by the GC (delete).
type NameIndexEvent struct {
Action NameIndexAction `json:"action"`
Name string `json:"name"`
PeerID string `json:"peer_id"`
DID string `json:"did"`
}
// nameIndexState holds the local in-memory name index and the sender-side
// deduplication tracker.
2026-03-11 16:28:15 +01:00
//
// Search strategy: trigram inverted index.
// - byName: lowercased name → peerID → DID (for delete and exact resolution)
// - byPeer: peerID → lowercased name (to recompute trigrams on delete)
// - trigrams: 3-char substring → set of peerIDs (for O(1) substring lookup)
//
// For needles shorter than 3 chars the trigram index cannot help; a linear
// scan of byName is used as fallback (rare and fast enough at small N).
2026-03-03 16:38:24 +01:00
type nameIndexState struct {
2026-03-11 16:28:15 +01:00
byName map[string]map[string]string // name → peerID → DID
byPeer map[string]string // peerID → name
trigrams map[string]map[string]struct{} // trigram → peerID set
indexMu sync.RWMutex
2026-03-03 16:38:24 +01:00
2026-03-11 16:28:15 +01:00
// emitted deduplicates GossipSub emissions within nameIndexDedupWindow.
// Purged periodically to prevent unbounded growth.
2026-03-03 16:38:24 +01:00
emitted map[string]time.Time
emittedMu sync.Mutex
}
2026-03-11 16:28:15 +01:00
// trigramsOf returns all overlapping 3-char substrings of s (already lowercased).
// If s is shorter than 3 chars the string itself is returned as the sole token.
func trigramsOf(s string) []string {
if len(s) < 3 {
return []string{s}
}
out := make([]string, 0, len(s)-2)
for i := 0; i <= len(s)-3; i++ {
out = append(out, s[i:i+3])
}
return out
}
// addTrigrams inserts peerID into every trigram bucket for name.
func (s *nameIndexState) addTrigrams(name, peerID string) {
for _, tg := range trigramsOf(name) {
if s.trigrams[tg] == nil {
s.trigrams[tg] = map[string]struct{}{}
}
s.trigrams[tg][peerID] = struct{}{}
}
}
// removeTrigrams deletes peerID from every trigram bucket for name,
// cleaning up empty buckets to keep memory tight.
func (s *nameIndexState) removeTrigrams(name, peerID string) {
for _, tg := range trigramsOf(name) {
if m := s.trigrams[tg]; m != nil {
delete(m, peerID)
if len(m) == 0 {
delete(s.trigrams, tg)
}
}
}
}
2026-03-03 16:38:24 +01:00
// shouldEmit returns true if the (action, name, peerID) tuple has not been
// emitted within nameIndexDedupWindow, updating the tracker if so.
2026-03-11 16:28:15 +01:00
//
// On DELETE: the ADD entry for the same peer is immediately removed — the peer
// is gone, keeping it would cause the map to grow with departed peers forever.
// The DELETE entry itself is kept for the dedup window to absorb duplicate
// delete events, then cleaned by the purgeEmitted ticker.
2026-03-03 16:38:24 +01:00
func (s *nameIndexState) shouldEmit(action NameIndexAction, name, peerID string) bool {
key := string(action) + ":" + name + ":" + peerID
s.emittedMu.Lock()
defer s.emittedMu.Unlock()
if t, ok := s.emitted[key]; ok && time.Since(t) < nameIndexDedupWindow {
return false
}
s.emitted[key] = time.Now()
2026-03-11 16:28:15 +01:00
if action == NameIndexDelete {
// Peer is leaving: drop its ADD entry — no longer needed.
delete(s.emitted, string(NameIndexAdd)+":"+name+":"+peerID)
}
2026-03-03 16:38:24 +01:00
return true
}
2026-03-11 16:28:15 +01:00
// purgeEmitted removes stale DELETE entries from the emitted dedup map.
// ADD entries are cleaned eagerly on DELETE, so only short-lived DELETE
// entries remain here; the ticker just trims those stragglers.
func (s *nameIndexState) purgeEmitted() {
now := time.Now()
s.emittedMu.Lock()
defer s.emittedMu.Unlock()
for k, t := range s.emitted {
if now.Sub(t) >= nameIndexDedupWindow {
delete(s.emitted, k)
}
}
}
2026-03-03 16:38:24 +01:00
// onEvent applies a received NameIndexEvent to the local index.
// "add" inserts/updates the mapping; "delete" removes it.
// Operations are idempotent — duplicate events from multiple indexers are harmless.
func (s *nameIndexState) onEvent(evt NameIndexEvent) {
if evt.Name == "" || evt.PeerID == "" {
return
}
2026-03-11 16:28:15 +01:00
nameLow := strings.ToLower(evt.Name)
2026-03-03 16:38:24 +01:00
s.indexMu.Lock()
defer s.indexMu.Unlock()
switch evt.Action {
case NameIndexAdd:
2026-03-11 16:28:15 +01:00
// If the peer previously had a different name, clean up old trigrams.
if old, ok := s.byPeer[evt.PeerID]; ok && old != nameLow {
s.removeTrigrams(old, evt.PeerID)
if s.byName[old] != nil {
delete(s.byName[old], evt.PeerID)
if len(s.byName[old]) == 0 {
delete(s.byName, old)
}
}
2026-03-03 16:38:24 +01:00
}
2026-03-11 16:28:15 +01:00
if s.byName[nameLow] == nil {
s.byName[nameLow] = map[string]string{}
}
s.byName[nameLow][evt.PeerID] = evt.DID
s.byPeer[evt.PeerID] = nameLow
s.addTrigrams(nameLow, evt.PeerID)
2026-03-03 16:38:24 +01:00
case NameIndexDelete:
2026-03-11 16:28:15 +01:00
// Use stored name so trigrams match exactly what was indexed.
name := nameLow
if stored, ok := s.byPeer[evt.PeerID]; ok {
name = stored
}
s.removeTrigrams(name, evt.PeerID)
delete(s.byPeer, evt.PeerID)
if s.byName[name] != nil {
delete(s.byName[name], evt.PeerID)
if len(s.byName[name]) == 0 {
delete(s.byName, name)
2026-03-03 16:38:24 +01:00
}
}
}
}
// initNameIndex joins TopicNameIndex and starts consuming events.
// Must be called after ix.PS is ready.
func (ix *IndexerService) initNameIndex(ps *pubsub.PubSub) {
logger := oclib.GetLogger()
2026-03-11 16:28:15 +01:00
state := &nameIndexState{
byName: map[string]map[string]string{},
byPeer: map[string]string{},
trigrams: map[string]map[string]struct{}{},
emitted: map[string]time.Time{},
2026-03-03 16:38:24 +01:00
}
2026-03-11 16:28:15 +01:00
ix.nameIndex = state
// Periodically purge the emitted dedup map so it doesn't grow forever.
go func() {
t := time.NewTicker(nameIndexDedupWindow)
defer t.Stop()
for range t.C {
state.purgeEmitted()
}
}()
2026-03-03 16:38:24 +01:00
ps.RegisterTopicValidator(TopicNameIndex, func(_ context.Context, _ pp.ID, _ *pubsub.Message) bool {
return true
})
topic, err := ps.Join(TopicNameIndex)
if err != nil {
logger.Err(err).Msg("name index: failed to join topic")
return
}
ix.LongLivedStreamRecordedService.LongLivedPubSubService.PubsubMu.Lock()
ix.LongLivedStreamRecordedService.LongLivedPubSubService.LongLivedPubSubs[TopicNameIndex] = topic
ix.LongLivedStreamRecordedService.LongLivedPubSubService.PubsubMu.Unlock()
common.SubscribeEvents(
ix.LongLivedStreamRecordedService.LongLivedPubSubService,
context.Background(),
TopicNameIndex,
-1,
func(_ context.Context, evt NameIndexEvent, _ string) {
ix.nameIndex.onEvent(evt)
},
)
}
// publishNameEvent emits a NameIndexEvent on TopicNameIndex, subject to the
// sender-side deduplication window.
func (ix *IndexerService) publishNameEvent(action NameIndexAction, name, peerID, did string) {
if ix.nameIndex == nil || name == "" || peerID == "" {
return
}
if !ix.nameIndex.shouldEmit(action, name, peerID) {
return
}
ix.LongLivedStreamRecordedService.LongLivedPubSubService.PubsubMu.RLock()
topic := ix.LongLivedStreamRecordedService.LongLivedPubSubService.LongLivedPubSubs[TopicNameIndex]
ix.LongLivedStreamRecordedService.LongLivedPubSubService.PubsubMu.RUnlock()
if topic == nil {
return
}
evt := NameIndexEvent{Action: action, Name: name, PeerID: peerID, DID: did}
b, err := json.Marshal(evt)
if err != nil {
return
}
_ = topic.Publish(context.Background(), b)
}
// LookupNameIndex searches the distributed name index for peers whose name
// contains needle (case-insensitive). Returns peerID → DID for matched peers.
2026-03-11 16:28:15 +01:00
// Returns nil if the name index is not initialised.
//
// Algorithm:
// - needle ≥ 3 chars: trigram intersection → O(|candidates|) verify pass.
// The trigram index immediately narrows the candidate set; false positives
// are eliminated by the full-string contains check.
// - needle < 3 chars: linear scan of byName (rare, still fast at small N).
2026-03-03 16:38:24 +01:00
func (ix *IndexerService) LookupNameIndex(needle string) map[string]string {
if ix.nameIndex == nil {
return nil
}
needleLow := strings.ToLower(needle)
2026-03-11 16:28:15 +01:00
result := map[string]string{}
2026-03-03 16:38:24 +01:00
ix.nameIndex.indexMu.RLock()
defer ix.nameIndex.indexMu.RUnlock()
2026-03-11 16:28:15 +01:00
if len(needleLow) < 3 {
// Short needle: linear scan fallback.
for name, peers := range ix.nameIndex.byName {
if strings.Contains(name, needleLow) {
for peerID, did := range peers {
result[peerID] = did
}
}
}
return result
}
// Trigram intersection: start with the first trigram's set, then
// progressively intersect with each subsequent trigram's set.
tgs := trigramsOf(needleLow)
var candidates map[string]struct{}
for _, tg := range tgs {
set := ix.nameIndex.trigrams[tg]
if len(set) == 0 {
return result // any empty trigram set → no possible match
}
if candidates == nil {
candidates = make(map[string]struct{}, len(set))
for pid := range set {
candidates[pid] = struct{}{}
}
} else {
for pid := range candidates {
if _, ok := set[pid]; !ok {
delete(candidates, pid)
}
}
}
if len(candidates) == 0 {
return result
}
}
// Full-string verification pass: trigrams admit false positives
// (e.g. "abc" and "bca" share the trigram "bc_" with a rotated name).
for peerID := range candidates {
name := ix.nameIndex.byPeer[peerID]
if strings.Contains(name, needleLow) {
did := ""
if m := ix.nameIndex.byName[name]; m != nil {
did = m[peerID]
2026-03-03 16:38:24 +01:00
}
2026-03-11 16:28:15 +01:00
result[peerID] = did
2026-03-03 16:38:24 +01:00
}
}
return result
}